Btrfs: Add data=ordered support
authorChris Mason <chris.mason@oracle.com>
Tue, 8 Jan 2008 20:46:30 +0000 (15:46 -0500)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:03:59 +0000 (11:03 -0400)
This forces file data extents down the disk along with the metadata that
references them.  The current implementation is fairly simple, and just
writes out all of the dirty pages in an inode before the commit.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
13 files changed:
fs/btrfs/Makefile
fs/btrfs/btrfs_inode.h
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/disk-io.h
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c [new file with mode: 0644]
fs/btrfs/ordered-data.h [new file with mode: 0644]
fs/btrfs/transaction.c
fs/btrfs/transaction.h

index d5804c5..ab9a9f8 100644 (file)
@@ -5,7 +5,7 @@ obj-m  := btrfs.o
 btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           hash.o file-item.o inode-item.o inode-map.o disk-io.o \
           transaction.o bit-radix.o inode.o file.o tree-defrag.o \
-          extent_map.o sysfs.o struct-funcs.o xattr.o acl.o
+          extent_map.o sysfs.o struct-funcs.o xattr.o acl.o ordered-data.o
 
 #btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
 #        root-tree.o dir-item.o hash.o file-item.o inode-item.o \
index d1d5af4..f27e633 100644 (file)
@@ -29,6 +29,7 @@ struct btrfs_inode {
        struct extent_map_tree extent_tree;
        struct inode vfs_inode;
 
+       u64 ordered_trans;
        /*
         * transid of the trans_handle that last modified this inode
         */
index 35c5707..43d2314 100644 (file)
@@ -221,7 +221,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
                    struct extent_buffer **cow_ret)
 {
        u64 search_start;
+       u64 header_trans;
        int ret;
+
        if (trans->transaction != root->fs_info->running_transaction) {
                printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
                       root->fs_info->running_transaction->transid);
@@ -232,7 +234,9 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
                       root->fs_info->generation);
                WARN_ON(1);
        }
-       if (btrfs_header_generation(buf) == trans->transid) {
+
+       header_trans = btrfs_header_generation(buf);
+       if (header_trans == trans->transid) {
                *cow_ret = buf;
                return 0;
        }
index 9873975..b55dba5 100644 (file)
@@ -16,8 +16,8 @@
  * Boston, MA 021110-1307, USA.
  */
 
-#ifndef __BTRFS__
-#define __BTRFS__
+#ifndef __BTRFS_CTREE__
+#define __BTRFS_CTREE__
 
 #include <linux/version.h>
 #include <linux/mm.h>
@@ -363,7 +363,6 @@ struct btrfs_root {
        struct inode *inode;
        struct kobject root_kobj;
        struct completion kobj_unregister;
-       struct rw_semaphore snap_sem;
        u64 objectid;
        u64 last_trans;
 
@@ -1142,6 +1141,8 @@ void btrfs_destroy_cachep(void);
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
                                struct btrfs_root *root);
+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
+                           u64 root_objectid);
 int btrfs_commit_write(struct file *file, struct page *page,
                       unsigned from, unsigned to);
 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
index a6170ff..34cf1f1 100644 (file)
@@ -406,7 +406,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
        memset(&root->root_kobj, 0, sizeof(root->root_kobj));
        init_completion(&root->kobj_unregister);
-       init_rwsem(&root->snap_sem);
        root->defrag_running = 0;
        root->defrag_level = 0;
        root->root_key.objectid = objectid;
@@ -498,6 +497,21 @@ insert:
        return root;
 }
 
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_objectid)
+{
+       struct btrfs_root *root;
+
+       if (root_objectid == BTRFS_ROOT_TREE_OBJECTID)
+               return fs_info->tree_root;
+       if (root_objectid == BTRFS_EXTENT_TREE_OBJECTID)
+               return fs_info->extent_root;
+
+       root = radix_tree_lookup(&fs_info->fs_roots_radix,
+                                (unsigned long)root_objectid);
+       return root;
+}
+
 struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
                                              struct btrfs_key *location)
 {
index 8c3cfd0..dae9fba 100644 (file)
@@ -34,6 +34,8 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
                      struct btrfs_root *root);
 struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
                                            u64 bytenr, u32 blocksize);
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                       u64 root_objectid);
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
                                      struct btrfs_key *location,
                                      const char *name, int namelen);
index c906bb1..68137cd 100644 (file)
@@ -1195,7 +1195,9 @@ static int pin_down_bytes(struct btrfs_root *root, u64 bytenr, u32 num_bytes,
                        if (btrfs_buffer_uptodate(buf)) {
                                u64 transid =
                                    root->fs_info->running_transaction->transid;
-                               if (btrfs_header_generation(buf) == transid) {
+                               u64 header_transid =
+                                       btrfs_header_generation(buf);
+                               if (header_transid == transid) {
                                        free_extent_buffer(buf);
                                        return 1;
                                }
index 94c9337..0a5f4de 100644 (file)
@@ -34,6 +34,7 @@
 #include "disk-io.h"
 #include "transaction.h"
 #include "btrfs_inode.h"
+#include "ordered-data.h"
 #include "ioctl.h"
 #include "print-tree.h"
 
@@ -329,6 +330,7 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
                root->fs_info->delalloc_bytes += (end_of_last_block + 1 -
                                          start_pos) - existing_delalloc;
                spin_unlock(&root->fs_info->delalloc_lock);
+               btrfs_add_ordered_inode(inode);
        } else {
                u64 aligned_end;
                /* step one, delete the existing extents in this range */
@@ -724,8 +726,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
 
        pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
 
-       down_read(&BTRFS_I(inode)->root->snap_sem);
-
        mutex_lock(&inode->i_mutex);
        first_index = pos >> PAGE_CACHE_SHIFT;
        last_index = (pos + count) >> PAGE_CACHE_SHIFT;
@@ -804,7 +804,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
        }
 out:
        mutex_unlock(&inode->i_mutex);
-       up_read(&BTRFS_I(inode)->root->snap_sem);
 
 out_nolock:
        kfree(pages);
index 6d0cd9a..6d6e1ac 100644 (file)
@@ -135,6 +135,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
                alloc_hint = ins.objectid + ins.offset;
                start += cur_alloc_size;
        }
+       btrfs_add_ordered_inode(inode);
 out:
        btrfs_end_transaction(trans, root);
        return ret;
@@ -367,8 +368,8 @@ void btrfs_read_locked_inode(struct inode *inode)
        path = btrfs_alloc_path();
        BUG_ON(!path);
        mutex_lock(&root->fs_info->fs_mutex);
-
        memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
+
        ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
        if (ret)
                goto make_bad;
@@ -898,7 +899,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
        if ((offset & (blocksize - 1)) == 0)
                goto out;
 
-       down_read(&root->snap_sem);
        ret = -ENOMEM;
        page = grab_cache_page(mapping, index);
        if (!page)
@@ -917,7 +917,6 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
 
        unlock_page(page);
        page_cache_release(page);
-       up_read(&BTRFS_I(inode)->root->snap_sem);
 out:
        return ret;
 }
@@ -1146,6 +1145,19 @@ static int btrfs_find_actor(struct inode *inode, void *opaque)
                args->root == BTRFS_I(inode)->root);
 }
 
+struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
+                           u64 root_objectid)
+{
+       struct btrfs_iget_args args;
+       args.ino = objectid;
+       args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
+
+       if (!args.root)
+               return NULL;
+
+       return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
+}
+
 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
                                struct btrfs_root *root)
 {
@@ -1336,7 +1348,6 @@ read_dir_items:
 
                        d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
                        btrfs_dir_item_key_to_cpu(leaf, di, &location);
-
                        over = filldir(dirent, name_ptr, name_len,
                                       found_key.offset,
                                       location.objectid,
@@ -2054,7 +2065,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 
        ret = -EINVAL;
 
-       down_read(&BTRFS_I(inode)->root->snap_sem);
        lock_page(page);
        wait_on_page_writeback(page);
        size = i_size_read(inode);
@@ -2075,7 +2085,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
        ret = btrfs_cow_one_page(inode, page, end);
 
 out_unlock:
-       up_read(&BTRFS_I(inode)->root->snap_sem);
        unlock_page(page);
 out:
        return ret;
@@ -2118,7 +2127,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name,
        struct btrfs_root_item root_item;
        struct btrfs_inode_item *inode_item;
        struct extent_buffer *leaf;
-       struct btrfs_root *new_root;
+       struct btrfs_root *new_root = root;
        struct inode *inode;
        struct inode *dir;
        int ret;
@@ -2230,7 +2239,7 @@ static int noinline create_subvol(struct btrfs_root *root, char *name,
                goto fail;
 fail:
        nr = trans->blocks_used;
-       err = btrfs_commit_transaction(trans, root);
+       err = btrfs_commit_transaction(trans, new_root);
        if (err && !ret)
                ret = err;
 fail_commit:
@@ -2253,10 +2262,6 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
        if (!root->ref_cows)
                return -EINVAL;
 
-       down_write(&root->snap_sem);
-       freeze_bdev(root->fs_info->sb->s_bdev);
-       thaw_bdev(root->fs_info->sb->s_bdev, root->fs_info->sb);
-
        mutex_lock(&root->fs_info->fs_mutex);
        ret = btrfs_check_free_space(root, 1, 0);
        if (ret)
@@ -2264,6 +2269,9 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
 
        trans = btrfs_start_transaction(root, 1);
        BUG_ON(!trans);
+       err = btrfs_commit_transaction(trans, root);
+
+       trans = btrfs_start_transaction(root, 1);
 
        ret = btrfs_update_inode(trans, root, root->inode);
        if (ret)
@@ -2272,9 +2280,7 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
        ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
                                       0, &objectid);
        if (ret)
-               goto fail;
-
-       memcpy(&new_root_item, &root->root_item,
+               goto fail; memcpy(&new_root_item, &root->root_item,
               sizeof(new_root_item));
 
        key.objectid = objectid;
@@ -2285,12 +2291,20 @@ static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
        btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
        free_extent_buffer(tmp);
 
+       /* write the ordered inodes to force all delayed allocations to
+        * be filled.  Once this is done, we can copy the root
+        */
+       mutex_lock(&root->fs_info->trans_mutex);
+       btrfs_write_ordered_inodes(trans, root);
+       mutex_unlock(&root->fs_info->trans_mutex);
+
        btrfs_copy_root(trans, root, root->node, &tmp, objectid);
 
        btrfs_set_root_bytenr(&new_root_item, tmp->start);
        btrfs_set_root_level(&new_root_item, btrfs_header_level(tmp));
        ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
                                &new_root_item);
+printk("new root %Lu node %Lu\n", objectid, tmp->start);
        free_extent_buffer(tmp);
        if (ret)
                goto fail;
@@ -2321,7 +2335,6 @@ fail:
                ret = err;
 fail_unlock:
        mutex_unlock(&root->fs_info->fs_mutex);
-       up_write(&root->snap_sem);
        btrfs_btree_balance_dirty(root, nr);
        return ret;
 }
@@ -2608,6 +2621,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        if (!ei)
                return NULL;
        ei->last_trans = 0;
+       ei->ordered_trans = 0;
        return &ei->vfs_inode;
 }
 
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
new file mode 100644 (file)
index 0000000..411aba8
--- /dev/null
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "btrfs_inode.h"
+
+struct tree_entry {
+       u64 root_objectid;
+       u64 objectid;
+       struct rb_node rb_node;
+};
+
+/*
+ * returns > 0 if entry passed (root, objectid) is > entry,
+ * < 0 if (root, objectid) < entry and zero if they are equal
+ */
+static int comp_entry(struct tree_entry *entry, u64 root_objectid,
+                     u64 objectid)
+{
+       if (root_objectid < entry->root_objectid)
+               return -1;
+       if (root_objectid > entry->root_objectid)
+               return 1;
+       if (objectid < entry->objectid)
+               return -1;
+       if (objectid > entry->objectid)
+               return 1;
+       return 0;
+}
+
+static struct rb_node *tree_insert(struct rb_root *root, u64 root_objectid,
+                                  u64 objectid, struct rb_node *node)
+{
+       struct rb_node ** p = &root->rb_node;
+       struct rb_node * parent = NULL;
+       struct tree_entry *entry;
+       int comp;
+
+       while(*p) {
+               parent = *p;
+               entry = rb_entry(parent, struct tree_entry, rb_node);
+
+               comp = comp_entry(entry, root_objectid, objectid);
+               if (comp < 0)
+                       p = &(*p)->rb_left;
+               else if (comp > 0)
+                       p = &(*p)->rb_right;
+               else
+                       return parent;
+       }
+
+       rb_link_node(node, parent, p);
+       rb_insert_color(node, root);
+       return NULL;
+}
+
+static struct rb_node *__tree_search(struct rb_root *root, u64 root_objectid,
+                                    u64 objectid, struct rb_node **prev_ret)
+{
+       struct rb_node * n = root->rb_node;
+       struct rb_node *prev = NULL;
+       struct tree_entry *entry;
+       struct tree_entry *prev_entry = NULL;
+       int comp;
+
+       while(n) {
+               entry = rb_entry(n, struct tree_entry, rb_node);
+               prev = n;
+               prev_entry = entry;
+               comp = comp_entry(entry, root_objectid, objectid);
+
+               if (comp < 0)
+                       n = n->rb_left;
+               else if (comp > 0)
+                       n = n->rb_right;
+               else
+                       return n;
+       }
+       if (!prev_ret)
+               return NULL;
+
+       while(prev && comp_entry(prev_entry, root_objectid, objectid) >= 0) {
+               prev = rb_next(prev);
+               prev_entry = rb_entry(prev, struct tree_entry, rb_node);
+       }
+       *prev_ret = prev;
+       return NULL;
+}
+
+static inline struct rb_node *tree_search(struct rb_root *root,
+                                         u64 root_objectid, u64 objectid)
+{
+       struct rb_node *prev;
+       struct rb_node *ret;
+       ret = __tree_search(root, root_objectid, objectid, &prev);
+       if (!ret)
+               return prev;
+       return ret;
+}
+
+int btrfs_add_ordered_inode(struct inode *inode)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       u64 root_objectid = root->root_key.objectid;
+       u64 transid = root->fs_info->running_transaction->transid;
+       struct tree_entry *entry;
+       struct rb_node *node;
+       struct btrfs_ordered_inode_tree *tree;
+
+       if (transid <= BTRFS_I(inode)->ordered_trans)
+               return 0;
+
+       tree = &root->fs_info->running_transaction->ordered_inode_tree;
+
+       read_lock(&tree->lock);
+       node = __tree_search(&tree->tree, root_objectid, inode->i_ino, NULL);
+       read_unlock(&tree->lock);
+       if (node) {
+               return 0;
+       }
+
+       entry = kmalloc(sizeof(*entry), GFP_NOFS);
+       if (!entry)
+               return -ENOMEM;
+
+       write_lock(&tree->lock);
+       entry->objectid = inode->i_ino;
+       entry->root_objectid = root_objectid;
+
+       node = tree_insert(&tree->tree, root_objectid,
+                          inode->i_ino, &entry->rb_node);
+
+       BTRFS_I(inode)->ordered_trans = transid;
+
+       write_unlock(&tree->lock);
+       if (node)
+               kfree(entry);
+       return 0;
+}
+
+int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
+                                      u64 *root_objectid, u64 *objectid)
+{
+       struct tree_entry *entry;
+       struct rb_node *node;
+
+       write_lock(&tree->lock);
+       node = tree_search(&tree->tree, *root_objectid, *objectid);
+       if (!node) {
+               write_unlock(&tree->lock);
+               return 0;
+       }
+       entry = rb_entry(node, struct tree_entry, rb_node);
+
+       while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
+               node = rb_next(node);
+               if (!node)
+                       break;
+               entry = rb_entry(node, struct tree_entry, rb_node);
+       }
+       if (!node) {
+               write_unlock(&tree->lock);
+               return 0;
+       }
+
+       *root_objectid = entry->root_objectid;
+       *objectid = entry->objectid;
+       write_unlock(&tree->lock);
+       return 1;
+}
+
+int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
+                                      u64 *root_objectid, u64 *objectid)
+{
+       struct tree_entry *entry;
+       struct rb_node *node;
+
+       write_lock(&tree->lock);
+       node = tree_search(&tree->tree, *root_objectid, *objectid);
+       if (!node) {
+               write_unlock(&tree->lock);
+               return 0;
+       }
+
+       entry = rb_entry(node, struct tree_entry, rb_node);
+       while(comp_entry(entry, *root_objectid, *objectid) >= 0) {
+               node = rb_next(node);
+               if (!node)
+                       break;
+               entry = rb_entry(node, struct tree_entry, rb_node);
+       }
+       if (!node) {
+               write_unlock(&tree->lock);
+               return 0;
+       }
+
+       *root_objectid = entry->root_objectid;
+       *objectid = entry->objectid;
+       rb_erase(node, &tree->tree);
+       write_unlock(&tree->lock);
+       kfree(entry);
+       return 1;
+}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
new file mode 100644 (file)
index 0000000..aaf9eb1
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2007 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_ORDERED_DATA__
+#define __BTRFS_ORDERED_DATA__
+
+struct btrfs_ordered_inode_tree {
+       rwlock_t lock;
+       struct rb_root tree;
+};
+
+static inline void
+btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
+{
+       rwlock_init(&t->lock);
+       t->tree.rb_node = NULL;
+}
+
+int btrfs_add_ordered_inode(struct inode *inode);
+int btrfs_find_del_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
+                                      u64 *root_objectid, u64 *objectid);
+int btrfs_find_first_ordered_inode(struct btrfs_ordered_inode_tree *tree,
+                                      u64 *root_objectid, u64 *objectid);
+#endif
index 02721ee..3ed5868 100644 (file)
@@ -67,6 +67,7 @@ static int join_transaction(struct btrfs_root *root)
                cur_trans->commit_done = 0;
                cur_trans->start_time = get_seconds();
                list_add_tail(&cur_trans->list, &root->fs_info->trans_list);
+               btrfs_ordered_inode_tree_init(&cur_trans->ordered_inode_tree);
                extent_map_tree_init(&cur_trans->dirty_pages,
                                     root->fs_info->btree_inode->i_mapping,
                                     GFP_NOFS);
@@ -473,6 +474,60 @@ static int drop_dirty_roots(struct btrfs_root *tree_root,
        return ret;
 }
 
+int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root)
+{
+       struct btrfs_transaction *cur_trans = trans->transaction;
+       struct inode *inode;
+       u64 root_objectid = 0;
+       u64 objectid = 0;
+       u64 transid = trans->transid;
+       int ret;
+
+printk("write ordered trans %Lu\n", transid);
+       while(1) {
+               ret = btrfs_find_first_ordered_inode(
+                               &cur_trans->ordered_inode_tree,
+                               &root_objectid, &objectid);
+               if (!ret)
+                       break;
+
+               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_unlock(&root->fs_info->fs_mutex);
+               inode = btrfs_ilookup(root->fs_info->sb, objectid,
+                                     root_objectid);
+               if (inode) {
+                       if (S_ISREG(inode->i_mode))
+                               filemap_fdatawrite(inode->i_mapping);
+                       iput(inode);
+               }
+               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+       while(1) {
+               root_objectid = 0;
+               objectid = 0;
+               ret = btrfs_find_del_first_ordered_inode(
+                               &cur_trans->ordered_inode_tree,
+                               &root_objectid, &objectid);
+               if (!ret)
+                       break;
+               mutex_unlock(&root->fs_info->trans_mutex);
+               mutex_unlock(&root->fs_info->fs_mutex);
+               inode = btrfs_ilookup(root->fs_info->sb, objectid,
+                                     root_objectid);
+               if (inode) {
+                       if (S_ISREG(inode->i_mode))
+                               filemap_write_and_wait(inode->i_mapping);
+                       iput(inode);
+               }
+               mutex_lock(&root->fs_info->fs_mutex);
+               mutex_lock(&root->fs_info->trans_mutex);
+       }
+printk("done write ordered trans %Lu\n", transid);
+       return 0;
+}
+
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root)
 {
@@ -550,10 +605,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                mutex_lock(&root->fs_info->fs_mutex);
                mutex_lock(&root->fs_info->trans_mutex);
                finish_wait(&cur_trans->writer_wait, &wait);
+               ret = btrfs_write_ordered_inodes(trans, root);
+
        } while (cur_trans->num_writers > 1 ||
                 (cur_trans->num_joined != joined));
 
        WARN_ON(cur_trans != trans->transaction);
+
        ret = add_dirty_roots(trans, &root->fs_info->fs_roots_radix,
                              &dirty_fs_roots);
        BUG_ON(ret);
index eef840b..c157ddb 100644 (file)
  * Boston, MA 021110-1307, USA.
  */
 
-#ifndef __TRANSACTION__
-#define __TRANSACTION__
+#ifndef __BTRFS_TRANSACTION__
+#define __BTRFS_TRANSACTION__
 #include "btrfs_inode.h"
+#include "ordered-data.h"
 
 struct btrfs_transaction {
        u64 transid;
@@ -30,6 +31,7 @@ struct btrfs_transaction {
        struct list_head list;
        struct extent_map_tree dirty_pages;
        unsigned long start_time;
+       struct btrfs_ordered_inode_tree ordered_inode_tree;
        wait_queue_head_t writer_wait;
        wait_queue_head_t commit_wait;
 };
@@ -90,4 +92,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
+int btrfs_write_ordered_inodes(struct btrfs_trans_handle *trans,
+                               struct btrfs_root *root);
 #endif