Btrfs: Fix nodatacow for the new data=ordered mode
authorYan Zheng <zheng.yan@oracle.com>
Tue, 5 Aug 2008 17:05:02 +0000 (13:05 -0400)
committerChris Mason <chris.mason@oracle.com>
Thu, 25 Sep 2008 15:04:06 +0000 (11:04 -0400)
Signed-off-by: Chris Mason <chris.mason@oracle.com>
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/transaction.c

index 116aee2..f90e5a7 100644 (file)
@@ -1403,7 +1403,8 @@ static inline struct dentry *fdentry(struct file *file) {
 }
 
 /* extent-tree.c */
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
                           struct btrfs_key *key, u64 bytenr);
 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
                         struct btrfs_root *root);
index 33cb2ac..fff219e 100644 (file)
@@ -893,10 +893,10 @@ out:
        return ret;
 }
 
-int btrfs_cross_ref_exists(struct btrfs_root *root,
+int btrfs_cross_ref_exists(struct btrfs_trans_handle *trans,
+                          struct btrfs_root *root,
                           struct btrfs_key *key, u64 bytenr)
 {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *old_root;
        struct btrfs_path *path = NULL;
        struct extent_buffer *eb;
@@ -908,6 +908,7 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
        int level;
        int ret;
 
+       BUG_ON(trans == NULL);
        BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
        ret = get_reference_status(root, bytenr, 0, key->objectid,
                                   &min_generation, &ref_count);
@@ -917,7 +918,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
        if (ref_count != 1)
                return 1;
 
-       trans = btrfs_start_transaction(root, 0);
        old_root = root->dirty_root->root;
        ref_generation = old_root->root_key.offset;
 
@@ -973,7 +973,6 @@ int btrfs_cross_ref_exists(struct btrfs_root *root,
 out:
        if (path)
                btrfs_free_path(path);
-       btrfs_end_transaction(trans, root);
        return ret;
 }
 
@@ -3320,7 +3319,7 @@ again:
        mutex_unlock(&root->fs_info->alloc_mutex);
 
        btrfs_start_delalloc_inodes(root);
-       btrfs_wait_ordered_extents(tree_root);
+       btrfs_wait_ordered_extents(tree_root, 0);
 
        mutex_lock(&root->fs_info->alloc_mutex);
 
@@ -3407,7 +3406,7 @@ next:
                btrfs_clean_old_snapshots(tree_root);
 
                btrfs_start_delalloc_inodes(root);
-               btrfs_wait_ordered_extents(tree_root);
+               btrfs_wait_ordered_extents(tree_root, 0);
 
                trans = btrfs_start_transaction(tree_root, 1);
                btrfs_commit_transaction(trans, tree_root);
index 4d8ffc0..c33053b 100644 (file)
@@ -166,7 +166,7 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
 
                cur_alloc_size = ins.offset;
                ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
-                                              ins.offset);
+                                              ins.offset, 0);
                BUG_ON(ret);
                if (num_bytes < cur_alloc_size) {
                        printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
@@ -187,31 +187,32 @@ static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
        u64 extent_start;
        u64 extent_end;
        u64 bytenr;
-       u64 cow_end;
        u64 loops = 0;
        u64 total_fs_bytes;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_block_group_cache *block_group;
+       struct btrfs_trans_handle *trans;
        struct extent_buffer *leaf;
        int found_type;
        struct btrfs_path *path;
        struct btrfs_file_extent_item *item;
        int ret;
-       int err;
+       int err = 0;
        struct btrfs_key found_key;
 
        total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
        path = btrfs_alloc_path();
        BUG_ON(!path);
+       trans = btrfs_join_transaction(root, 1);
+       BUG_ON(!trans);
 again:
        ret = btrfs_lookup_file_extent(NULL, root, path,
                                       inode->i_ino, start, 0);
        if (ret < 0) {
-               btrfs_free_path(path);
-               return ret;
+               err = ret;
+               goto out;
        }
 
-       cow_end = end;
        if (ret != 0) {
                if (path->slots[0] == 0)
                        goto not_found;
@@ -244,12 +245,11 @@ again:
                if (start < extent_start || start >= extent_end)
                        goto not_found;
 
-               cow_end = min(end, extent_end - 1);
                bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
                if (bytenr == 0)
                        goto not_found;
 
-               if (btrfs_cross_ref_exists(root, &found_key, bytenr))
+               if (btrfs_cross_ref_exists(trans, root, &found_key, bytenr))
                        goto not_found;
                /*
                 * we may be called by the resizer, make sure we're inside
@@ -260,24 +260,32 @@ again:
                if (!block_group || block_group->ro)
                        goto not_found;
 
+               bytenr += btrfs_file_extent_offset(leaf, item);
+               extent_num_bytes = min(end + 1, extent_end) - start;
+               ret = btrfs_add_ordered_extent(inode, start, bytenr,
+                                               extent_num_bytes, 1);
+               if (ret) {
+                       err = ret;
+                       goto out;
+               }
+
+               btrfs_release_path(root, path);
                start = extent_end;
+               if (start <= end) {
+                       loops++;
+                       goto again;
+               }
        } else {
-               goto not_found;
-       }
-loop:
-       if (start > end) {
+not_found:
+               btrfs_end_transaction(trans, root);
                btrfs_free_path(path);
-               return 0;
+               return cow_file_range(inode, start, end);
        }
-       btrfs_release_path(root, path);
-       loops++;
-       goto again;
-
-not_found:
-       btrfs_release_path(root, path);
-       cow_file_range(inode, start, end);
-       start = end + 1;
-       goto loop;
+out:
+       WARN_ON(err);
+       btrfs_end_transaction(trans, root);
+       btrfs_free_path(path);
+       return err;
 }
 
 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
@@ -385,6 +393,11 @@ int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
                goto mapit;
        }
 
+       if (btrfs_test_opt(root, NODATASUM) ||
+           btrfs_test_flag(inode, NODATASUM)) {
+               goto mapit;
+       }
+
        return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
                                   inode, rw, bio, mirror_num,
                                   __btrfs_submit_bio_hook);
@@ -527,6 +540,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 
        ordered_extent = btrfs_lookup_ordered_extent(inode, start);
        BUG_ON(!ordered_extent);
+       if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
+               goto nocow;
 
        lock_extent(io_tree, ordered_extent->file_offset,
                    ordered_extent->file_offset + ordered_extent->len - 1,
@@ -567,6 +582,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
        unlock_extent(io_tree, ordered_extent->file_offset,
                    ordered_extent->file_offset + ordered_extent->len - 1,
                    GFP_NOFS);
+nocow:
        add_pending_csums(trans, inode, ordered_extent->file_offset,
                          &ordered_extent->list);
 
index 3932c7c..59b64c7 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/version.h>
 #include <linux/xattr.h>
+#include <linux/vmalloc.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
index db200e6..da6d43e 100644 (file)
@@ -152,7 +152,7 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
  * inserted.
  */
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-                            u64 start, u64 len)
+                            u64 start, u64 len, int nocow)
 {
        struct btrfs_ordered_inode_tree *tree;
        struct rb_node *node;
@@ -168,6 +168,8 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->start = start;
        entry->len = len;
        entry->inode = inode;
+       if (nocow)
+               set_bit(BTRFS_ORDERED_NOCOW, &entry->flags);
 
        /* one ref for the tree */
        atomic_set(&entry->refs, 1);
@@ -303,10 +305,11 @@ int btrfs_remove_ordered_extent(struct inode *inode,
        return 0;
 }
 
-int btrfs_wait_ordered_extents(struct btrfs_root *root)
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
 {
        struct list_head splice;
        struct list_head *cur;
+       struct list_head *tmp;
        struct btrfs_ordered_extent *ordered;
        struct inode *inode;
 
@@ -314,10 +317,16 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root)
 
        spin_lock(&root->fs_info->ordered_extent_lock);
        list_splice_init(&root->fs_info->ordered_extents, &splice);
-       while(!list_empty(&splice)) {
+       list_for_each_safe(cur, tmp, &splice) {
                cur = splice.next;
                ordered = list_entry(cur, struct btrfs_ordered_extent,
                                     root_extent_list);
+               if (nocow_only &&
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
+                       cond_resched_lock(&root->fs_info->ordered_extent_lock);
+                       continue;
+               }
+
                list_del_init(&ordered->root_extent_list);
                atomic_inc(&ordered->refs);
                inode = ordered->inode;
@@ -338,6 +347,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root)
 
                spin_lock(&root->fs_info->ordered_extent_lock);
        }
+       list_splice_init(&splice, &root->fs_info->ordered_extents);
        spin_unlock(&root->fs_info->ordered_extent_lock);
        return 0;
 }
index 5efe6b6..fd45519 100644 (file)
@@ -64,6 +64,8 @@ struct btrfs_ordered_sum {
 
 #define BTRFS_ORDERED_COMPLETE 1 /* set when removed from the tree */
 
+#define BTRFS_ORDERED_NOCOW 2 /* set when we want to write in place */
+
 struct btrfs_ordered_extent {
        /* logical offset in the file */
        u64 file_offset;
@@ -125,7 +127,7 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 int btrfs_dec_test_ordered_pending(struct inode *inode,
                                       u64 file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
-                            u64 start, u64 len);
+                            u64 start, u64 len, int nocow);
 int btrfs_add_ordered_sum(struct inode *inode,
                          struct btrfs_ordered_extent *entry,
                          struct btrfs_ordered_sum *sum);
@@ -143,5 +145,5 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
                                       pgoff_t start, pgoff_t end);
 int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
                           loff_t end, int sync_mode);
-int btrfs_wait_ordered_extents(struct btrfs_root *root);
+int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
 #endif
index ebf5362..9d3d08e 100644 (file)
@@ -438,6 +438,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
 
                                free_extent_buffer(root->commit_root);
                                root->commit_root = NULL;
+                               root->dirty_root = NULL;
 
                                spin_lock(&root->list_lock);
                                list_del_init(&dirty->root->dead_list);
@@ -461,6 +462,7 @@ static noinline int add_dirty_roots(struct btrfs_trans_handle *trans,
                               sizeof(struct btrfs_disk_key));
                        root->root_item.drop_level = 0;
                        root->commit_root = NULL;
+                       root->dirty_root = NULL;
                        root->root_key.offset = root->fs_info->generation;
                        btrfs_set_root_bytenr(&root->root_item,
                                              root->node->start);
@@ -762,7 +764,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
        }
 
        do {
+               int snap_pending = 0;
                joined = cur_trans->num_joined;
+               if (!list_empty(&trans->transaction->pending_snapshots))
+                       snap_pending = 1;
+
                WARN_ON(cur_trans != trans->transaction);
                prepare_to_wait(&cur_trans->writer_wait, &wait,
                                TASK_UNINTERRUPTIBLE);
@@ -774,6 +780,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
                mutex_unlock(&root->fs_info->trans_mutex);
 
+               if (snap_pending) {
+                       ret = btrfs_wait_ordered_extents(root, 1);
+                       BUG_ON(ret);
+               }
+
                schedule_timeout(timeout);
 
                mutex_lock(&root->fs_info->trans_mutex);