Btrfs-progs: add skinny metadata support to progs V3
authorJosef Bacik <jbacik@fusionio.com>
Fri, 15 Mar 2013 19:32:16 +0000 (15:32 -0400)
committerDavid Sterba <dsterba@suse.cz>
Tue, 23 Apr 2013 16:56:20 +0000 (18:56 +0200)
This fixes up the progs to properly deal with skinny metadata.  This adds the -x
option to mkfs and btrfstune for enabling the skinny metadata option.  This also
makes changes to fsck so it can properly deal with the skinny metadata entries.
Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
btrfs-image.c
btrfstune.c
cmds-check.c
ctree.c
ctree.h
extent-tree.c
man/mkfs.btrfs.8.in
mkfs.c
print-tree.c

index 9e13978..9a0a249 100644 (file)
@@ -823,13 +823,17 @@ static int create_metadump(const char *input, FILE *out, int num_threads,
 
                btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
                if (key.objectid < bytenr ||
-                   key.type != BTRFS_EXTENT_ITEM_KEY) {
+                   (key.type != BTRFS_EXTENT_ITEM_KEY &&
+                    key.type != BTRFS_METADATA_ITEM_KEY)) {
                        path->slots[0]++;
                        continue;
                }
 
                bytenr = key.objectid;
-               num_bytes = key.offset;
+               if (key.type == BTRFS_METADATA_ITEM_KEY)
+                       num_bytes = key.offset;
+               else
+                       num_bytes = root->leafsize;
 
                if (btrfs_item_size_nr(leaf, path->slots[0]) > sizeof(*ei)) {
                        ei = btrfs_item_ptr(leaf, path->slots[0],
index 2f3d087..993f2d2 100644 (file)
@@ -81,11 +81,28 @@ int enable_extrefs_flag(struct btrfs_root *root)
        return 0;
 }
 
+int enable_skinny_metadata(struct btrfs_root *root)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_super_block *disk_super;
+       u64 super_flags;
+
+       disk_super = &root->fs_info->super_copy;
+       super_flags = btrfs_super_incompat_flags(disk_super);
+       super_flags |= BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA;
+       trans = btrfs_start_transaction(root, 1);
+       btrfs_set_super_incompat_flags(disk_super, super_flags);
+       btrfs_commit_transaction(trans, root);
+
+       return 0;
+}
+
 static void print_usage(void)
 {
        fprintf(stderr, "usage: btrfstune [options] device\n");
        fprintf(stderr, "\t-S value\tenable/disable seeding\n");
        fprintf(stderr, "\t-r \t\tenable extended inode refs\n");
+       fprintf(stderr, "\t-x enable skinny metadata extent refs\n");
 }
 
 int main(int argc, char *argv[])
@@ -95,10 +112,11 @@ int main(int argc, char *argv[])
        int extrefs_flag = 0;
        int seeding_flag = 0;
        int seeding_value = 0;
+       int skinny_flag = 0;
        int ret;
 
        while(1) {
-               int c = getopt(argc, argv, "S:r");
+               int c = getopt(argc, argv, "S:rx");
                if (c < 0)
                        break;
                switch(c) {
@@ -109,6 +127,9 @@ int main(int argc, char *argv[])
                case 'r':
                        extrefs_flag = 1;
                        break;
+               case 'x':
+                       skinny_flag = 1;
+                       break;
                default:
                        print_usage();
                        return 1;
@@ -145,6 +166,11 @@ int main(int argc, char *argv[])
                success++;
        }
 
+       if (skinny_flag) {
+               enable_skinny_metadata(root);
+               success++;
+       }
+
        if (success > 0) {
                ret = 0;
        } else {
index 12192fa..6622ea8 100644 (file)
@@ -43,6 +43,7 @@ static u64 bytes_used = 0;
 static u64 total_csum_bytes = 0;
 static u64 total_btree_bytes = 0;
 static u64 total_fs_tree_bytes = 0;
+static u64 total_extent_tree_bytes = 0;
 static u64 btree_space_waste = 0;
 static u64 data_bytes_allocated = 0;
 static u64 data_bytes_referenced = 0;
@@ -91,6 +92,7 @@ struct extent_record {
        unsigned int content_checked:1;
        unsigned int owner_ref_checked:1;
        unsigned int is_root:1;
+       unsigned int metadata:1;
 };
 
 struct inode_backref {
@@ -1208,7 +1210,7 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
        WARN_ON(*level >= BTRFS_MAX_LEVEL);
        ret = btrfs_lookup_extent_info(NULL, root,
                                       path->nodes[*level]->start,
-                                      path->nodes[*level]->len, &refs, NULL);
+                                      *level, 1, &refs, NULL);
        if (ret < 0)
                goto out;
 
@@ -1236,8 +1238,8 @@ static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
                bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
                ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
                blocksize = btrfs_level_size(root, *level - 1);
-               ret = btrfs_lookup_extent_info(NULL, root, bytenr, blocksize,
-                                              &refs, NULL);
+               ret = btrfs_lookup_extent_info(NULL, root, bytenr, *level - 1,
+                                              1, &refs, NULL);
                if (ret < 0)
                        refs = 0;
 
@@ -2224,7 +2226,7 @@ static int add_extent_rec(struct cache_tree *extent_cache,
                          struct btrfs_key *parent_key,
                          u64 start, u64 nr, u64 extent_item_refs,
                          int is_root, int inc_ref, int set_checked,
-                         u64 max_size)
+                         int metadata, u64 max_size)
 {
        struct extent_record *rec;
        struct cache_extent *cache;
@@ -2277,6 +2279,7 @@ static int add_extent_rec(struct cache_tree *extent_cache,
        rec->nr = max(nr, max_size);
        rec->content_checked = 0;
        rec->owner_ref_checked = 0;
+       rec->metadata = metadata;
        INIT_LIST_HEAD(&rec->backrefs);
 
        if (is_root)
@@ -2320,7 +2323,8 @@ static int add_tree_backref(struct cache_tree *extent_cache, u64 bytenr,
 
        cache = find_cache_extent(extent_cache, bytenr, 1);
        if (!cache) {
-               add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0, 0);
+               add_extent_rec(extent_cache, NULL, bytenr,
+                              1, 0, 0, 0, 0, 1, 0);
                cache = find_cache_extent(extent_cache, bytenr, 1);
                if (!cache)
                        abort();
@@ -2368,7 +2372,7 @@ static int add_data_backref(struct cache_tree *extent_cache, u64 bytenr,
        cache = find_cache_extent(extent_cache, bytenr, 1);
        if (!cache) {
                add_extent_rec(extent_cache, NULL, bytenr, 1, 0, 0, 0, 0,
-                              max_size);
+                              0, max_size);
                cache = find_cache_extent(extent_cache, bytenr, 1);
                if (!cache)
                        abort();
@@ -2506,7 +2510,8 @@ static int process_extent_ref_v0(struct cache_tree *extent_cache,
 }
 #endif
 
-static int process_extent_item(struct cache_tree *extent_cache,
+static int process_extent_item(struct btrfs_root *root,
+                              struct cache_tree *extent_cache,
                               struct extent_buffer *eb, int slot)
 {
        struct btrfs_extent_item *ei;
@@ -2520,9 +2525,18 @@ static int process_extent_item(struct cache_tree *extent_cache,
        u32 item_size = btrfs_item_size_nr(eb, slot);
        u64 refs = 0;
        u64 offset;
+       u64 num_bytes;
+       int metadata = 0;
 
        btrfs_item_key_to_cpu(eb, &key, slot);
 
+       if (key.type == BTRFS_METADATA_ITEM_KEY) {
+               metadata = 1;
+               num_bytes = root->leafsize;
+       } else {
+               num_bytes = key.offset;
+       }
+
        if (item_size < sizeof(*ei)) {
 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0
                struct btrfs_extent_item_v0 *ei0;
@@ -2533,17 +2547,19 @@ static int process_extent_item(struct cache_tree *extent_cache,
                BUG();
 #endif
                return add_extent_rec(extent_cache, NULL, key.objectid,
-                                     key.offset, refs, 0, 0, 0, key.offset);
+                                     num_bytes, refs, 0, 0, 0, metadata,
+                                     num_bytes);
        }
 
        ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
        refs = btrfs_extent_refs(eb, ei);
 
-       add_extent_rec(extent_cache, NULL, key.objectid, key.offset,
-                      refs, 0, 0, 0, key.offset);
+       add_extent_rec(extent_cache, NULL, key.objectid, num_bytes,
+                      refs, 0, 0, 0, metadata, num_bytes);
 
        ptr = (unsigned long)(ei + 1);
-       if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)
+       if (btrfs_extent_flags(eb, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK &&
+           key.type == BTRFS_EXTENT_ITEM_KEY)
                ptr += sizeof(struct btrfs_tree_block_info);
 
        end = (unsigned long)ei + item_size;
@@ -2568,18 +2584,18 @@ static int process_extent_item(struct cache_tree *extent_cache,
                                                                       dref),
                                        btrfs_extent_data_ref_offset(eb, dref),
                                        btrfs_extent_data_ref_count(eb, dref),
-                                       0, key.offset);
+                                       0, num_bytes);
                        break;
                case BTRFS_SHARED_DATA_REF_KEY:
                        sref = (struct btrfs_shared_data_ref *)(iref + 1);
                        add_data_backref(extent_cache, key.objectid, offset,
                                        0, 0, 0,
                                        btrfs_shared_data_ref_count(eb, sref),
-                                       0, key.offset);
+                                       0, num_bytes);
                        break;
                default:
                        fprintf(stderr, "corrupt extent record: key %Lu %u %Lu\n",
-                               key.objectid, key.type, key.offset);
+                               key.objectid, key.type, num_bytes);
                        goto out;
                }
                ptr += btrfs_extent_inline_ref_size(type);
@@ -2657,7 +2673,9 @@ static int run_next_block(struct btrfs_root *root,
 
        nritems = btrfs_header_nritems(buf);
 
-       ret = btrfs_lookup_extent_info(NULL, root, bytenr, size, NULL, &flags);
+       ret = btrfs_lookup_extent_info(NULL, root, bytenr,
+                                      btrfs_header_level(buf), 1, NULL,
+                                      &flags);
        if (ret < 0)
                flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 
@@ -2679,7 +2697,13 @@ static int run_next_block(struct btrfs_root *root,
                        struct btrfs_file_extent_item *fi;
                        btrfs_item_key_to_cpu(buf, &key, i);
                        if (key.type == BTRFS_EXTENT_ITEM_KEY) {
-                               process_extent_item(extent_cache, buf, i);
+                               process_extent_item(root, extent_cache, buf,
+                                                   i);
+                               continue;
+                       }
+                       if (key.type == BTRFS_METADATA_ITEM_KEY) {
+                               process_extent_item(root, extent_cache, buf,
+                                                   i);
                                continue;
                        }
                        if (key.type == BTRFS_EXTENT_CSUM_KEY) {
@@ -2753,7 +2777,7 @@ static int run_next_block(struct btrfs_root *root,
                        ret = add_extent_rec(extent_cache, NULL,
                                   btrfs_file_extent_disk_bytenr(buf, fi),
                                   btrfs_file_extent_disk_num_bytes(buf, fi),
-                                  0, 0, 1, 1,
+                                  0, 0, 1, 1, 0,
                                   btrfs_file_extent_disk_num_bytes(buf, fi));
                        add_data_backref(extent_cache,
                                btrfs_file_extent_disk_bytenr(buf, fi),
@@ -2776,7 +2800,7 @@ static int run_next_block(struct btrfs_root *root,
                        u32 size = btrfs_level_size(root, level - 1);
                        btrfs_node_key_to_cpu(buf, &key, i);
                        ret = add_extent_rec(extent_cache, &key,
-                                            ptr, size, 0, 0, 1, 0, size);
+                                            ptr, size, 0, 0, 1, 0, 1, size);
                        BUG_ON(ret);
 
                        add_tree_backref(extent_cache, ptr, parent, owner, 1);
@@ -2793,6 +2817,8 @@ static int run_next_block(struct btrfs_root *root,
        total_btree_bytes += buf->len;
        if (fs_root_objectid(btrfs_header_owner(buf)))
                total_fs_tree_bytes += buf->len;
+       if (btrfs_header_owner(buf) == BTRFS_EXTENT_TREE_OBJECTID)
+               total_extent_tree_bytes += buf->len;
        if (!found_old_backref &&
            btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID &&
            btrfs_header_backref_rev(buf) == BTRFS_MIXED_BACKREF_REV &&
@@ -2815,7 +2841,7 @@ static int add_root_to_pending(struct extent_buffer *buf,
        else
                add_pending(pending, seen, buf->start, buf->len);
        add_extent_rec(extent_cache, NULL, buf->start, buf->len,
-                      0, 1, 1, 0, buf->len);
+                      0, 1, 1, 0, 1, buf->len);
 
        if (root_key->objectid == BTRFS_TREE_RELOC_OBJECTID ||
            btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
@@ -2936,6 +2962,7 @@ static int delete_extent_records(struct btrfs_trans_handle *trans,
                        break;
 
                if (found_key.type != BTRFS_EXTENT_ITEM_KEY &&
+                   found_key.type != BTRFS_METADATA_ITEM_KEY &&
                    found_key.type != BTRFS_TREE_BLOCK_REF_KEY &&
                    found_key.type != BTRFS_EXTENT_DATA_REF_KEY &&
                    found_key.type != BTRFS_EXTENT_REF_V0_KEY &&
@@ -2961,9 +2988,13 @@ static int delete_extent_records(struct btrfs_trans_handle *trans,
                        break;
                btrfs_release_path(NULL, path);
 
-               if (found_key.type == BTRFS_EXTENT_ITEM_KEY) {
+               if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+                   found_key.type == BTRFS_METADATA_ITEM_KEY) {
+                       u64 bytes = (found_key.type == BTRFS_EXTENT_ITEM_KEY) ?
+                               found_key.offset : root->leafsize;
+
                        ret = btrfs_update_block_group(trans, root, bytenr,
-                                                      found_key.offset, 0, 0);
+                                                      bytes, 0, 0);
                        if (ret)
                                break;
                }
@@ -3128,7 +3159,8 @@ static int fixup_extent_refs(struct btrfs_trans_handle *trans,
 
        /* remember our flags for recreating the extent */
        ret = btrfs_lookup_extent_info(NULL, info->extent_root, rec->start,
-                                      rec->max_size, NULL, &flags);
+                                      rec->max_size, rec->metadata, NULL,
+                                      &flags);
        if (ret < 0)
                flags = BTRFS_BLOCK_FLAG_FULL_BACKREF;
 
@@ -3677,6 +3709,8 @@ out:
               (unsigned long long)total_btree_bytes);
        printf("total fs tree bytes: %llu\n",
               (unsigned long long)total_fs_tree_bytes);
+       printf("total extent tree bytes: %llu\n",
+              (unsigned long long)total_extent_tree_bytes);
        printf("btree space waste bytes: %llu\n",
               (unsigned long long)btree_space_waste);
        printf("file data blocks allocated: %llu\n referenced %llu\n",
diff --git a/ctree.c b/ctree.c
index 1778a51..16f4daa 100644 (file)
--- a/ctree.c
+++ b/ctree.c
@@ -235,7 +235,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
 
        if (btrfs_block_can_be_shared(root, buf)) {
                ret = btrfs_lookup_extent_info(trans, root, buf->start,
-                                              buf->len, &refs, &flags);
+                                              btrfs_header_level(buf), 1,
+                                              &refs, &flags);
                BUG_ON(ret);
                BUG_ON(refs == 0);
        } else {
@@ -277,7 +278,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
                }
                if (new_flags != 0) {
                        ret = btrfs_set_block_flags(trans, root, buf->start,
-                                                   buf->len, new_flags);
+                                                   btrfs_header_level(buf),
+                                                   new_flags);
                        BUG_ON(ret);
                }
        } else {
diff --git a/ctree.h b/ctree.h
index 953cc6e..b8f8146 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -454,9 +454,10 @@ struct btrfs_super_block {
  * code was pretty buggy.  Lets not let them try anymore.
  */
 #define BTRFS_FEATURE_INCOMPAT_BIG_METADATA     (1ULL << 5)
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF   (1ULL << 6)
 #define BTRFS_FEATURE_INCOMPAT_RAID56          (1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
 
-#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF   (1ULL << 6)
 
 #define BTRFS_FEATURE_COMPAT_SUPP              0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP           0ULL
@@ -467,7 +468,8 @@ struct btrfs_super_block {
         BTRFS_FEATURE_INCOMPAT_BIG_METADATA |          \
         BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |         \
         BTRFS_FEATURE_INCOMPAT_RAID56 |                \
-        BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
+        BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |          \
+        BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -1043,6 +1045,12 @@ struct btrfs_root {
  */
 #define BTRFS_EXTENT_ITEM_KEY  168
 
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY        169
+
 #define BTRFS_TREE_BLOCK_REF_KEY       176
 
 #define BTRFS_EXTENT_DATA_REF_KEY      178
@@ -2033,6 +2041,13 @@ static inline u32 btrfs_level_size(struct btrfs_root *root, int level) {
        return root->nodesize;
 }
 
+static inline int btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
+{
+       struct btrfs_super_block *disk_super;
+       disk_super = &fs_info->super_copy;
+       return !!(btrfs_super_incompat_flags(disk_super) & flag);
+}
+
 /* helper function to cast into the data area of the leaf. */
 #define btrfs_item_ptr(leaf, slot, type) \
        ((type *)(btrfs_leaf_data(leaf) + \
@@ -2070,10 +2085,10 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
                       u64 search_end, struct btrfs_key *ins, int data);
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 bytenr,
-                            u64 num_bytes, u64 *refs, u64 *flags);
+                            u64 offset, int metadata, u64 *refs, u64 *flags);
 int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
-                         u64 bytenr, u64 num_bytes, u64 flags);
+                         u64 bytenr, int level, u64 flags);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                  struct extent_buffer *buf, int record_parent);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
index 2d82977..ed4756f 100644 (file)
@@ -115,7 +115,8 @@ static int cache_block_group(struct btrfs_root *root,
        last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET);
        key.objectid = last;
        key.offset = 0;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+       key.type = 0;
+
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
@@ -142,14 +143,18 @@ static int cache_block_group(struct btrfs_root *root,
                        break;
                }
 
-               if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
+               if (key.type == BTRFS_EXTENT_ITEM_KEY ||
+                   key.type == BTRFS_METADATA_ITEM_KEY) {
                        if (key.objectid > last) {
                                hole_size = key.objectid - last;
                                set_extent_dirty(free_space_cache, last,
                                                 last + hole_size - 1,
                                                 GFP_NOFS);
                        }
-                       last = key.objectid + key.offset;
+                       if (key.type == BTRFS_METADATA_ITEM_KEY)
+                               last = key.objectid + root->leafsize;
+                       else
+                               last = key.objectid + key.offset;
                }
 next:
                path->slots[0]++;
@@ -1024,6 +1029,9 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
        int want;
        int ret;
        int err = 0;
+       int skinny_metadata =
+               btrfs_fs_incompat(root->fs_info,
+                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
 
        key.objectid = bytenr;
        key.type = BTRFS_EXTENT_ITEM_KEY;
@@ -1034,11 +1042,44 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
                extra_size = btrfs_extent_inline_ref_size(want);
        else
                extra_size = -1;
+
+       if (owner < BTRFS_FIRST_FREE_OBJECTID && skinny_metadata) {
+               skinny_metadata = 1;
+               key.type = BTRFS_METADATA_ITEM_KEY;
+               key.offset = owner;
+       } else if (skinny_metadata) {
+               skinny_metadata = 0;
+       }
+
+again:
        ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1);
        if (ret < 0) {
                err = ret;
                goto out;
        }
+
+       /*
+        * We may be a newly converted file system which still has the old fat
+        * extent entries for metadata, so try and see if we have one of those.
+        */
+       if (ret > 0 && skinny_metadata) {
+               skinny_metadata = 0;
+               if (path->slots[0]) {
+                       path->slots[0]--;
+                       btrfs_item_key_to_cpu(path->nodes[0], &key,
+                                             path->slots[0]);
+                       if (key.objectid == bytenr &&
+                           key.type == BTRFS_EXTENT_ITEM_KEY &&
+                           key.offset == num_bytes)
+                               ret = 0;
+               }
+               if (ret) {
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+                       key.offset = num_bytes;
+                       goto again;
+               }
+       }
+
        if (ret) {
                printf("Failed to find [%llu, %u, %llu]\n", key.objectid, key.type, key.offset);
                return -ENOENT;
@@ -1079,10 +1120,10 @@ static int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
        ptr = (unsigned long)(ei + 1);
        end = (unsigned long)ei + item_size;
 
-       if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+       if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !skinny_metadata) {
                ptr += sizeof(struct btrfs_tree_block_info);
                BUG_ON(ptr > end);
-       } else {
+       } else if (!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) {
                if (!(flags & BTRFS_EXTENT_FLAG_DATA)) {
                        return -EIO;
                }
@@ -1439,7 +1480,7 @@ int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
 
 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root, u64 bytenr,
-                            u64 num_bytes, u64 *refs, u64 *flags)
+                            u64 offset, int metadata, u64 *refs, u64 *flags)
 {
        struct btrfs_path *path;
        int ret;
@@ -1450,16 +1491,54 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
        u64 num_refs;
        u64 extent_flags;
 
-       WARN_ON(num_bytes < root->sectorsize);
+       if (metadata &&
+           !btrfs_fs_incompat(root->fs_info,
+                              BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
+               offset = root->leafsize;
+               metadata = 0;
+       }
+
        path = btrfs_alloc_path();
        path->reada = 1;
+
        key.objectid = bytenr;
-       key.offset = num_bytes;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+       key.offset = offset;
+       if (metadata)
+               key.type = BTRFS_METADATA_ITEM_KEY;
+       else
+               key.type = BTRFS_EXTENT_ITEM_KEY;
+
+again:
        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
                                0, 0);
        if (ret < 0)
                goto out;
+
+       /*
+        * Deal with the fact that we may have mixed SKINNY and normal refs.  If
+        * we didn't find what we wanted check and see if we have a normal ref
+        * right next to us, or re-search if we are on the edge of the leaf just
+        * to make sure.
+        */
+       if (ret > 0 && metadata) {
+               if (path->slots) {
+                       path->slots[0]--;
+                       btrfs_item_key_to_cpu(path->nodes[0], &key,
+                                             path->slots[0]);
+                       if (key.objectid == bytenr &&
+                           key.type == BTRFS_METADATA_ITEM_KEY)
+                               ret = 0;
+               }
+
+               if (ret) {
+                       btrfs_release_path(root, path);
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+                       key.offset = root->leafsize;
+                       metadata = 0;
+                       goto again;
+               }
+       }
+
        if (ret != 0) {
                ret = -EIO;
                goto out;
@@ -1497,7 +1576,7 @@ out:
 
 int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
                          struct btrfs_root *root,
-                         u64 bytenr, u64 num_bytes, u64 flags)
+                         u64 bytenr, int level, u64 flags)
 {
        struct btrfs_path *path;
        int ret;
@@ -1505,17 +1584,47 @@ int btrfs_set_block_flags(struct btrfs_trans_handle *trans,
        struct extent_buffer *l;
        struct btrfs_extent_item *item;
        u32 item_size;
+       int skinny_metadata =
+               btrfs_fs_incompat(root->fs_info,
+                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
 
-       WARN_ON(num_bytes < root->sectorsize);
        path = btrfs_alloc_path();
        path->reada = 1;
+
        key.objectid = bytenr;
-       key.offset = num_bytes;
-       btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+       if (skinny_metadata) {
+               key.offset = level;
+               key.type = BTRFS_METADATA_ITEM_KEY;
+       } else {
+               key.offset = root->leafsize;
+               key.type = BTRFS_EXTENT_ITEM_KEY;
+       }
+
+again:
        ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
                                0, 0);
        if (ret < 0)
                goto out;
+
+       if (ret > 0 && skinny_metadata) {
+               skinny_metadata = 0;
+               if (path->slots[0]--) {
+                       path->slots[0]--;
+                       btrfs_item_key_to_cpu(path->nodes[0], &key,
+                                             path->slots[0]);
+                       if (key.objectid == bytenr &&
+                           key.offset == root->leafsize &&
+                           key.type == BTRFS_EXTENT_ITEM_KEY)
+                               ret = 0;
+               }
+               if (ret) {
+                       btrfs_release_path(root, path);
+                       key.offset = root->leafsize;
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+                       goto again;
+               }
+       }
+
        if (ret != 0) {
                btrfs_print_leaf(root, path->nodes[0]);
                printk("failed to find block number %Lu\n",
@@ -1985,6 +2094,9 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
        struct pending_extent_op *extent_op;
        struct btrfs_key key;
        int ret;
+       int skinny_metadata =
+               btrfs_fs_incompat(extent_root->fs_info,
+                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
 
        path = btrfs_alloc_path();
 
@@ -2000,14 +2112,19 @@ static int finish_current_insert(struct btrfs_trans_handle *trans,
 
                if (extent_op->type == PENDING_EXTENT_INSERT) {
                        key.objectid = start;
-                       key.offset = end + 1 - start;
-                       key.type = BTRFS_EXTENT_ITEM_KEY;
+                       if (skinny_metadata) {
+                               key.offset = extent_op->level;
+                               key.type = BTRFS_METADATA_ITEM_KEY;
+                       } else {
+                               key.offset = extent_op->num_bytes;
+                               key.type = BTRFS_EXTENT_ITEM_KEY;
+                       }
                        ret = alloc_reserved_tree_block(trans, extent_root,
                                                extent_root->root_key.objectid,
                                                trans->transid,
                                                extent_op->flags,
                                                &extent_op->key,
-                                               extent_op->level, &key);
+                                               extent_op->level, &key);
                } else {
                        BUG_ON(1);
                }
@@ -2088,6 +2205,9 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        int num_to_del = 1;
        u32 item_size;
        u64 refs;
+       int skinny_metadata =
+               btrfs_fs_incompat(extent_root->fs_info,
+                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
 
        if (root->fs_info->free_extent_hook) {
                root->fs_info->free_extent_hook(trans, root, bytenr, num_bytes,
@@ -2103,6 +2223,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        path->leave_spinning = 1;
 
        is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
+       if (is_data)
+               skinny_metadata = 0;
        BUG_ON(!is_data && refs_to_drop != 1);
 
        ret = lookup_extent_backref(trans, extent_root, path, &iref,
@@ -2121,6 +2243,11 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                                found_extent = 1;
                                break;
                        }
+                       if (key.type == BTRFS_METADATA_ITEM_KEY &&
+                           key.offset == owner_objectid) {
+                               found_extent = 1;
+                               break;
+                       }
                        if (path->slots[0] - extent_slot > 5)
                                break;
                        extent_slot--;
@@ -2140,11 +2267,37 @@ static int __free_extent(struct btrfs_trans_handle *trans,
                        path->leave_spinning = 1;
 
                        key.objectid = bytenr;
-                       key.type = BTRFS_EXTENT_ITEM_KEY;
-                       key.offset = num_bytes;
+
+                       if (skinny_metadata) {
+                               key.type = BTRFS_METADATA_ITEM_KEY;
+                               key.offset = owner_objectid;
+                       } else {
+                               key.type = BTRFS_EXTENT_ITEM_KEY;
+                               key.offset = num_bytes;
+                       }
 
                        ret = btrfs_search_slot(trans, extent_root,
                                                &key, path, -1, 1);
+                       if (ret > 0 && skinny_metadata && path->slots[0]) {
+                               path->slots[0]--;
+                               btrfs_item_key_to_cpu(path->nodes[0],
+                                                     &key,
+                                                     path->slots[0]);
+                               if (key.objectid == bytenr &&
+                                   key.type == BTRFS_EXTENT_ITEM_KEY &&
+                                   key.offset == num_bytes)
+                                       ret = 0;
+                       }
+
+                       if (ret > 0 && skinny_metadata) {
+                               skinny_metadata = 0;
+                               btrfs_release_path(extent_root, path);
+                               key.type = BTRFS_EXTENT_ITEM_KEY;
+                               key.offset = num_bytes;
+                               ret = btrfs_search_slot(trans, extent_root,
+                                                       &key, path, -1, 1);
+                       }
+
                        if (ret) {
                                printk(KERN_ERR "umm, got %d back from search"
                                       ", was looking for %llu\n", ret,
@@ -2199,7 +2352,8 @@ static int __free_extent(struct btrfs_trans_handle *trans,
        BUG_ON(item_size < sizeof(*ei));
        ei = btrfs_item_ptr(leaf, extent_slot,
                            struct btrfs_extent_item);
-       if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
+       if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
+           key.type == BTRFS_EXTENT_ITEM_KEY) {
                struct btrfs_tree_block_info *bi;
                BUG_ON(item_size < sizeof(*ei) + sizeof(*bi));
                bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -2571,7 +2725,13 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        struct btrfs_extent_inline_ref *iref;
        struct btrfs_path *path;
        struct extent_buffer *leaf;
-       u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref);
+       u32 size = sizeof(*extent_item) + sizeof(*iref);
+       int skinny_metadata =
+               btrfs_fs_incompat(fs_info,
+                                 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA);
+
+       if (!skinny_metadata)
+               size += sizeof(*block_info);
 
        path = btrfs_alloc_path();
        BUG_ON(!path);
@@ -2588,19 +2748,23 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        btrfs_set_extent_generation(leaf, extent_item, generation);
        btrfs_set_extent_flags(leaf, extent_item,
                               flags | BTRFS_EXTENT_FLAG_TREE_BLOCK);
-       block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
 
-       btrfs_set_tree_block_key(leaf, block_info, key);
-       btrfs_set_tree_block_level(leaf, block_info, level);
+       if (skinny_metadata) {
+               iref = (struct btrfs_extent_inline_ref *)(extent_item + 1);
+       } else {
+               block_info = (struct btrfs_tree_block_info *)(extent_item + 1);
+               btrfs_set_tree_block_key(leaf, block_info, key);
+               btrfs_set_tree_block_level(leaf, block_info, level);
+               iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
+       }
 
-       iref = (struct btrfs_extent_inline_ref *)(block_info + 1);
        btrfs_set_extent_inline_ref_type(leaf, iref, BTRFS_TREE_BLOCK_REF_KEY);
        btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid);
 
        btrfs_mark_buffer_dirty(leaf);
        btrfs_free_path(path);
 
-       ret = update_block_group(trans, root, ins->objectid, ins->offset,
+       ret = update_block_group(trans, root, ins->objectid, root->leafsize,
                                 1, 0);
        return 0;
 }
@@ -2636,6 +2800,11 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
                set_state_private(&root->fs_info->extent_ins,
                                  ins->objectid, (unsigned long)extent_op);
        } else {
+               if (btrfs_fs_incompat(root->fs_info,
+                               BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)) {
+                       ins->offset = level;
+                       ins->type = BTRFS_METADATA_ITEM_KEY;
+               }
                ret = alloc_reserved_tree_block(trans, root, root_objectid,
                                                generation, flags,
                                                key, level, ins);
@@ -3345,6 +3514,8 @@ static int btrfs_count_extents_in_block_group(struct btrfs_root *root,
                        break;
                if (key.type == BTRFS_EXTENT_ITEM_KEY)
                        bytes_used += key.offset;
+               if (key.type == BTRFS_METADATA_ITEM_KEY)
+                       bytes_used += root->leafsize;
                path->slots[0]++;
        }
        *total = bytes_used;
@@ -3457,6 +3628,11 @@ int btrfs_fix_block_accounting(struct btrfs_trans_handle *trans,
                        ret = btrfs_update_block_group(trans, root,
                                  key.objectid, key.offset, 1, 0);
                        BUG_ON(ret);
+               } else if (key.type == BTRFS_METADATA_ITEM_KEY) {
+                       bytes_used += root->leafsize;
+                       ret = btrfs_update_block_group(trans, root,
+                                 key.objectid, root->leafsize, 1, 0);
+                       BUG_ON(ret);
                }
                path.slots[0]++;
        }
index a3f1503..05180f8 100644 (file)
@@ -17,6 +17,7 @@ mkfs.btrfs \- create a btrfs filesystem
 [ \fB\-K\fP ]
 [ \fB\-h\fP ]
 [ \fB\-V\fP ]
+[ \fB\-x\fP\fI skinny-extents\fP ]
 \fI device\fP [ \fIdevice ...\fP ]
 .SH DESCRIPTION
 .B mkfs.btrfs
@@ -79,6 +80,13 @@ Do not perform whole device TRIM operation by default.
 .TP
 \fB\-V\fR, \fB\-\-version\fR
 Print the \fBmkfs.btrfs\fP version and exit.
+.TP
+\fB\-x\fR, \fB\-\-skinny\-extents\fR
+Use the new skinny extents disk format.  This is an non-backwards compatible
+format change.  This allows btrfs to use a smaller item for describing
+metadata blocks in the extent tree.  This results in a 30% savings in the
+extent tree size, which improves performance on file systems with lots of
+metadata.
 .SH UNIT
 As default the unit is the byte, however it is possible to append a suffix
 to the arguments like \fBk\fP for KBytes, \fBm\fP for MBytes...
diff --git a/mkfs.c b/mkfs.c
index 443ece2..01180c3 100644 (file)
--- a/mkfs.c
+++ b/mkfs.c
@@ -337,6 +337,7 @@ static void print_usage(void)
        fprintf(stderr, "\t -r --rootdir the source directory\n");
        fprintf(stderr, "\t -K --nodiscard do not perform whole device TRIM\n");
        fprintf(stderr, "\t -V --version print the mkfs.btrfs version and exit\n");
+       fprintf(stderr, "\t -x --skinny-extents use the new skinny extent disk format");
        fprintf(stderr, "%s\n", BTRFS_BUILD_VERSION);
        exit(1);
 }
@@ -397,6 +398,7 @@ static struct option long_options[] = {
        { "version", 0, NULL, 'V' },
        { "rootdir", 1, NULL, 'r' },
        { "nodiscard", 0, NULL, 'K' },
+       { "skinny-extents", 0, NULL, 'x'},
        { 0, 0, 0, 0}
 };
 
@@ -1289,6 +1291,7 @@ int main(int ac, char **av)
        int nodiscard = 0;
        int ssd = 0;
        int force_overwrite = 0;
+       int skinny_meta_extents = 0;
 
        char *source_dir = NULL;
        int source_dir_set = 0;
@@ -1304,8 +1307,8 @@ int main(int ac, char **av)
 
        while(1) {
                int c;
-               c = getopt_long(ac, av, "A:b:fl:n:s:m:d:L:r:VMK", long_options,
-                               &option_index);
+               c = getopt_long(ac, av, "A:b:fl:n:s:m:d:L:r:VMKx",
+                               long_options, &option_index);
                if (c < 0)
                        break;
                switch(c) {
@@ -1356,6 +1359,9 @@ int main(int ac, char **av)
                        case 'K':
                                nodiscard=1;
                                break;
+                       case 'x':
+                               skinny_meta_extents = 1;
+                               break;
                        default:
                                print_usage();
                }
@@ -1533,6 +1539,9 @@ raid_groups:
        if (mixed)
                flags |= BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS;
 
+       if (skinny_meta_extents)
+               flags |= BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA;
+
        btrfs_set_super_incompat_flags(super, flags);
 
        if ((data_profile | metadata_profile) &
index c9e891b..49c8384 100644 (file)
@@ -202,7 +202,7 @@ static void print_file_extent_item(struct extent_buffer *eb,
               btrfs_file_extent_compression(eb, fi));
 }
 
-static void print_extent_item(struct extent_buffer *eb, int slot)
+static void print_extent_item(struct extent_buffer *eb, int slot, int metadata)
 {
        struct btrfs_extent_item *ei;
        struct btrfs_extent_inline_ref *iref;
@@ -237,7 +237,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
               (unsigned long long)btrfs_extent_generation(eb, ei),
               (unsigned long long)flags);
 
-       if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
+       if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK && !metadata) {
                struct btrfs_tree_block_info *info;
                info = (struct btrfs_tree_block_info *)(ei + 1);
                btrfs_tree_block_key(eb, info, &key);
@@ -245,7 +245,13 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
                btrfs_print_key(&key);
                printf(" level %d\n", btrfs_tree_block_level(eb, info));
                iref = (struct btrfs_extent_inline_ref *)(info + 1);
-       } else {
+       } else if (metadata) {
+               struct btrfs_key tmp;
+
+               btrfs_item_key_to_cpu(eb, &tmp, slot);
+               printf("\t\ttree block skinny level %d\n", (int)tmp.offset);
+               iref = (struct btrfs_extent_inline_ref *)(ei + 1);
+       } else{
                iref = (struct btrfs_extent_inline_ref *)(ei + 1);
        }
 
@@ -440,6 +446,9 @@ static void print_key_type(u64 objectid, u8 type)
        case BTRFS_EXTENT_ITEM_KEY:
                printf("EXTENT_ITEM");
                break;
+       case BTRFS_METADATA_ITEM_KEY:
+               printf("METADATA_ITEM");
+               break;
        case BTRFS_TREE_BLOCK_REF_KEY:
                printf("TREE_BLOCK_REF");
                break;
@@ -699,7 +708,10 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                        print_root_ref(l, i, "backref");
                        break;
                case BTRFS_EXTENT_ITEM_KEY:
-                       print_extent_item(l, i);
+                       print_extent_item(l, i, 0);
+                       break;
+               case BTRFS_METADATA_ITEM_KEY:
+                       print_extent_item(l, i, 1);
                        break;
                case BTRFS_TREE_BLOCK_REF_KEY:
                        printf("\t\ttree block backref\n");