early extent mapping support
authorChris Mason <chris.mason@oracle.com>
Tue, 20 Feb 2007 21:40:44 +0000 (16:40 -0500)
committerDavid Woodhouse <dwmw2@hera.kernel.org>
Tue, 20 Feb 2007 21:40:44 +0000 (16:40 -0500)
Makefile
ctree.c
ctree.h
disk-io.c
disk-io.h

index 63360212a075c4ed22f71bfc752d7cfe5802ba61..df065dd2dce7dc5b42aa7f63ef87a97e61a0a32d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -4,8 +4,8 @@ CFLAGS= -g -Wall
 .c.o:
        $(CC) $(CFLAGS) -c $<
 
-ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h
-       gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o
+ctree: ctree.o disk-io.h ctree.h disk-io.o radix-tree.o radix-tree.h mkfs.o
+       gcc $(CFLAGS) -o ctree ctree.o disk-io.o radix-tree.o mkfs.o
 
 clean:
        rm ctree *.o
diff --git a/ctree.c b/ctree.c
index 6b64f49a0279d07f8fc7552012e3225e77716c8b..2177744dedd3a0c9101e41082016c87613c633ad 100644 (file)
--- a/ctree.c
+++ b/ctree.c
@@ -5,6 +5,8 @@
 #include "ctree.h"
 #include "disk-io.h"
 
+static int refill_alloc_extent(struct ctree_root *root);
+
 static inline void init_path(struct ctree_path *p)
 {
        memset(p, 0, sizeof(*p));
@@ -29,7 +31,7 @@ static inline unsigned int leaf_data_end(struct leaf *leaf)
 {
        unsigned int nr = leaf->header.nritems;
        if (nr == 0)
-               return ARRAY_SIZE(leaf->data);
+               return sizeof(leaf->data);
        return leaf->items[nr-1].offset;
 }
 
@@ -421,7 +423,7 @@ int insert_ptr(struct ctree_root *root,
         * due to splitting.  Once we've done all the splitting required
         * do the inserts based on the data in the bal array.
         */
-       memset(bal, 0, ARRAY_SIZE(bal));
+       memset(bal, 0, sizeof(bal));
        while(t && t->node.header.nritems == NODEPTRS_PER_BLOCK) {
                c = &t->node;
                if (push_node_left(root, path,
@@ -756,6 +758,7 @@ int insert_item(struct ctree_root *root, struct key *key,
        if (leaf_free_space(leaf) < 0)
                BUG();
        release_path(root, &path);
+       refill_alloc_extent(root);
        return 0;
 }
 
@@ -884,6 +887,135 @@ int del_item(struct ctree_root *root, struct ctree_path *path)
        return 0;
 }
 
+int next_leaf(struct ctree_root *root, struct ctree_path *path)
+{
+       int slot;
+       int level = 1;
+       u64 blocknr;
+       struct tree_buffer *c;
+       struct tree_buffer *next;
+
+       while(level < MAX_LEVEL) {
+               if (!path->nodes[level])
+                       return -1;
+               slot = path->slots[level] + 1;
+               c = path->nodes[level];
+               if (slot >= c->node.header.nritems) {
+                       level++;
+                       continue;
+               }
+               blocknr = c->node.blockptrs[slot];
+               next = read_tree_block(root, blocknr);
+               break;
+       }
+       path->slots[level] = slot;
+       while(1) {
+               level--;
+               c = path->nodes[level];
+               tree_block_release(root, c);
+               path->nodes[level] = next;
+               path->slots[level] = 0;
+               if (!level)
+                       break;
+               next = read_tree_block(root, next->node.blockptrs[0]);
+       }
+       return 0;
+}
+
+int alloc_extent(struct ctree_root *root, u64 num_blocks, u64 search_start,
+                u64 search_end, u64 owner, struct key *ins)
+{
+       struct ctree_path path;
+       struct key *key;
+       int ret;
+       u64 hole_size = 0;
+       int slot = 0;
+       u64 last_block;
+       int start_found = 0;
+       struct leaf *l;
+       struct extent_item extent_item;
+
+       init_path(&path);
+       ins->objectid = search_start;
+       ins->offset = 0;
+       ins->flags = 0;
+
+       ret = search_slot(root, ins, &path);
+       while (1) {
+               l = &path.nodes[0]->leaf;
+               slot = path.slots[0];
+               if (!l) {
+                       // FIXME allocate root
+               }
+               if (slot >= l->header.nritems) {
+                       ret = next_leaf(root, &path);
+                       if (ret == 0)
+                               continue;
+                       if (!start_found) {
+                               ins->objectid = search_start;
+                               ins->offset = num_blocks;
+                               hole_size = search_end - search_start;
+                               goto insert;
+                       }
+                       ins->objectid = last_block;
+                       ins->offset = num_blocks;
+                       hole_size = search_end - last_block;
+                       goto insert;
+               }
+               key = &l->items[slot].key;
+               if (start_found) {
+                       hole_size = key->objectid - last_block;
+                       if (hole_size > num_blocks) {
+                               ins->objectid = last_block;
+                               ins->offset = num_blocks;
+                               goto insert;
+                       }
+               } else
+                       start_found = 1;
+               last_block = key->objectid + key->offset;
+               path.slots[0]++;
+               printf("last block is not %lu\n", last_block);
+       }
+       // FIXME -ENOSPC
+insert:
+       extent_item.refs = 1;
+       extent_item.owner = owner;
+       ret = insert_item(root, ins, &extent_item, sizeof(extent_item));
+       return ret;
+}
+
+static int refill_alloc_extent(struct ctree_root *root)
+{
+       struct alloc_extent *ae = root->alloc_extent;
+       struct key key;
+       int ret;
+       int min_blocks = MAX_LEVEL * 2;
+
+       printf("refill alloc root %p, numused %lu total %lu\n", root, ae->num_used, ae->num_blocks);
+       if (ae->num_blocks > ae->num_used && ae->num_blocks - ae->num_used >
+           min_blocks)
+               return 0;
+       ae = root->reserve_extent;
+       if (ae->num_blocks > ae->num_used) {
+               if (root->alloc_extent->num_blocks == 0) {
+                       /* we should swap reserve/alloc_extent when alloc
+                        * fills up
+                        */
+                       BUG();
+               }
+               if (ae->num_blocks - ae->num_used < min_blocks)
+                       BUG();
+               return 0;
+       }
+       // FIXME, this recurses
+       ret = alloc_extent(root->extent_root,
+                          min_blocks * 2, 0, (unsigned long)-1, 0, &key);
+       ae->blocknr = key.objectid;
+       ae->num_blocks = key.offset;
+       ae->num_used = 0;
+       return ret;
+}
+
 void print_leaf(struct leaf *l)
 {
        int i;
@@ -948,8 +1080,8 @@ void print_tree(struct ctree_root *root, struct tree_buffer *t)
 
 /* for testing only */
 int next_key(int i, int max_key) {
-       return rand() % max_key;
-       // return i;
+       // return rand() % max_key;
+       return i;
 }
 
 int main() {
@@ -960,7 +1092,7 @@ int main() {
        int i;
        int num;
        int ret;
-       int run_size = 25000;
+       int run_size = 256;
        int max_key = 100000000;
        int tree_size = 0;
        struct ctree_path path;
@@ -980,10 +1112,20 @@ int main() {
                ins.objectid = num;
                ins.offset = 0;
                ins.flags = 0;
+               printf("insert %d\n", i);
                ret = insert_item(root, &ins, buf, strlen(buf));
                if (!ret)
                        tree_size++;
+               printf("done insert %d\n", i);
        }
+       printf("root used: %lu\n", root->alloc_extent->num_used);
+       printf("root tree\n");
+       print_tree(root, root->node);
+       printf("map tree\n");
+       printf("map used: %lu\n", root->extent_root->alloc_extent->num_used);
+       print_tree(root->extent_root, root->extent_root->node);
+       exit(1);
+
        close_ctree(root);
        root = open_ctree("dbfile");
        printf("starting search\n");
diff --git a/ctree.h b/ctree.h
index 586bf1866042e5eb804ef19a88d631df634b7d52..b737925be3142bd9fcd5722879da4fcd87be07bd 100644 (file)
--- a/ctree.h
+++ b/ctree.h
@@ -1,7 +1,7 @@
 #ifndef __CTREE__
 #define __CTREE__
 
-#define CTREE_BLOCKSIZE 4096
+#define CTREE_BLOCKSIZE 256
 
 struct key {
        u64 objectid;
@@ -22,18 +22,41 @@ struct header {
 #define NODEPTRS_PER_BLOCK ((CTREE_BLOCKSIZE - sizeof(struct header)) / \
                            (sizeof(struct key) + sizeof(u64)))
 
-#define LEVEL_BITS 3
-#define MAX_LEVEL (1 << LEVEL_BITS)
+#define MAX_LEVEL 8
 #define node_level(f) ((f) & (MAX_LEVEL-1))
 #define is_leaf(f) (node_level(f) == 0)
 
 struct tree_buffer;
+
+struct alloc_extent {
+       u64 blocknr;
+       u64 num_blocks;
+       u64 num_used;
+} __attribute__ ((__packed__));
+
 struct ctree_root {
        struct tree_buffer *node;
+       struct ctree_root *extent_root;
+       struct alloc_extent *alloc_extent;
+       struct alloc_extent *reserve_extent;
        int fp;
        struct radix_tree_root cache_radix;
+       struct alloc_extent ai1;
+       struct alloc_extent ai2;
 };
 
+struct ctree_root_info {
+       u64 fsid[2]; /* FS specific uuid */
+       u64 blocknr; /* blocknr of this block */
+       u64 objectid; /* inode number of this root */
+       u64 tree_root; /* the tree root */
+       u32 csum;
+       u32 ham;
+       struct alloc_extent alloc_extent;
+       struct alloc_extent reserve_extent;
+       u64 snapuuid[2]; /* root specific uuid */
+} __attribute__ ((__packed__));
+
 struct item {
        struct key key;
        u16 offset;
@@ -55,6 +78,11 @@ struct node {
        u64 blockptrs[NODEPTRS_PER_BLOCK];
 } __attribute__ ((__packed__));
 
+struct extent_item {
+       u32 refs;
+       u64 owner;
+} __attribute__ ((__packed__));
+
 struct ctree_path {
        struct tree_buffer *nodes[MAX_LEVEL];
        int slots[MAX_LEVEL];
index 8d51a07051d94dd5fbe5b30953c38bdbb7f14c35..653f18aab330adc8ae5f138f019d9db0ac884d79 100644 (file)
--- a/disk-io.c
+++ b/disk-io.c
 
 static int allocated_blocks = 0;
 
-struct ctree_header {
-       u64 root_block;
-} __attribute__ ((__packed__));
-
 static int get_free_block(struct ctree_root *root, u64 *block)
 {
        struct stat st;
        int ret;
 
+       if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks)
+               return -1;
+
+       *block = root->alloc_extent->blocknr + root->alloc_extent->num_used;
+       root->alloc_extent->num_used += 1;
+       if (root->alloc_extent->num_used >= root->alloc_extent->num_blocks) {
+               struct alloc_extent *ae = root->alloc_extent;
+               root->alloc_extent = root->reserve_extent;
+               root->reserve_extent = ae;
+               ae->num_blocks = 0;
+       }
        st.st_size = 0;
        ret = fstat(root->fp, &st);
-       if (st.st_size > sizeof(struct ctree_header)) {
-               *block = (st.st_size -
-                       sizeof(struct ctree_header)) / CTREE_BLOCKSIZE;
-       } else {
-               *block = 0;
-       }
-       ret = ftruncate(root->fp, sizeof(struct ctree_header) + (*block + 1) *
-                       CTREE_BLOCKSIZE);
+       if (st.st_size < (*block + 1) * CTREE_BLOCKSIZE)
+               ret = ftruncate(root->fp,
+                               (*block + 1) * CTREE_BLOCKSIZE);
        return ret;
 }
 
@@ -72,7 +74,7 @@ struct tree_buffer *alloc_free_block(struct ctree_root *root)
 
 struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr)
 {
-       loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
+       loff_t offset = blocknr * CTREE_BLOCKSIZE;
        struct tree_buffer *buf;
        int ret;
 
@@ -101,7 +103,7 @@ struct tree_buffer *read_tree_block(struct ctree_root *root, u64 blocknr)
 int write_tree_block(struct ctree_root *root, struct tree_buffer *buf)
 {
        u64 blocknr = buf->blocknr;
-       loff_t offset = blocknr * CTREE_BLOCKSIZE + sizeof(struct ctree_header);
+       loff_t offset = blocknr * CTREE_BLOCKSIZE;
        int ret;
 
        if (buf->blocknr != buf->node.header.blocknr)
@@ -114,11 +116,32 @@ int write_tree_block(struct ctree_root *root, struct tree_buffer *buf)
        return 0;
 }
 
+struct ctree_super_block {
+       struct ctree_root_info root_info;
+       struct ctree_root_info extent_info;
+} __attribute__ ((__packed__));
+
+static int __setup_root(struct ctree_root *root, struct ctree_root *extent_root,
+                       struct ctree_root_info *info, int fp)
+{
+       root->fp = fp;
+       root->node = read_tree_block(root, info->tree_root);
+       root->extent_root = extent_root;
+       memcpy(&root->ai1, &info->alloc_extent, sizeof(info->alloc_extent));
+       memcpy(&root->ai2, &info->reserve_extent, sizeof(info->reserve_extent));
+       root->alloc_extent = &root->ai1;
+       root->reserve_extent = &root->ai2;
+       INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL);
+       printf("setup done reading root %p, used %lu\n", root, root->alloc_extent->num_used);
+       return 0;
+}
+
 struct ctree_root *open_ctree(char *filename)
 {
        struct ctree_root *root = malloc(sizeof(struct ctree_root));
+       struct ctree_root *extent_root = malloc(sizeof(struct ctree_root));
+       struct ctree_super_block super;
        int fp;
-       u64 root_block;
        int ret;
 
        fp = open(filename, O_CREAT | O_RDWR);
@@ -126,14 +149,20 @@ struct ctree_root *open_ctree(char *filename)
                free(root);
                return NULL;
        }
-       root->fp = fp;
-       INIT_RADIX_TREE(&root->cache_radix, GFP_KERNEL);
-       ret = pread(fp, &root_block, sizeof(u64), 0);
-       if (ret == sizeof(u64)) {
-               printf("reading root node at block %lu\n", root_block);
-               root->node = read_tree_block(root, root_block);
-       } else
-               root->node = NULL;
+       ret = pread(fp, &super, sizeof(struct ctree_super_block),
+                    CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE));
+       if (ret == 0) {
+               ret = mkfs(fp);
+               if (ret)
+                       return NULL;
+               ret = pread(fp, &super, sizeof(struct ctree_super_block),
+                            CTREE_SUPER_INFO_OFFSET(CTREE_BLOCKSIZE));
+               if (ret != sizeof(struct ctree_super_block))
+                       return NULL;
+       }
+       BUG_ON(ret < 0);
+       __setup_root(root, extent_root, &super.root_info, fp);
+       __setup_root(extent_root, extent_root, &super.extent_info, fp);
        return root;
 }
 
@@ -160,6 +189,7 @@ int update_root_block(struct ctree_root *root)
 
 void tree_block_release(struct ctree_root *root, struct tree_buffer *buf)
 {
+       return;
        buf->count--;
        if (buf->count == 0) {
                if (!radix_tree_lookup(&root->cache_radix, buf->blocknr))
index ee95fa05c4a3e1fcf79928fcc7f6951086223b30..41acb65eee19b42774a101a31587fcb32c6ff1ee 100644 (file)
--- a/disk-io.h
+++ b/disk-io.h
@@ -17,5 +17,8 @@ int close_ctree(struct ctree_root *root);
 void tree_block_release(struct ctree_root *root, struct tree_buffer *buf);
 struct tree_buffer *alloc_free_block(struct ctree_root *root);
 int update_root_block(struct ctree_root *root);
+int mkfs(int fd);
+
+#define CTREE_SUPER_INFO_OFFSET(bs) (16 * (bs))
 
 #endif