Btrfs: reduce stalls during transaction commit

author Chris Mason <chris.mason@oracle.com>

Fri, 13 Mar 2009 00:12:45 +0000 (20:12 -0400)

committer Chris Mason <chris.mason@oracle.com>

Tue, 24 Mar 2009 20:14:26 +0000 (16:14 -0400)
author Chris Mason <chris.mason@oracle.com>
Fri, 13 Mar 2009 00:12:45 +0000 (20:12 -0400)
committer Chris Mason <chris.mason@oracle.com>
Tue, 24 Mar 2009 20:14:26 +0000 (16:14 -0400)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 3b8b6c21270132e2ce2ce438ca8dffda9f20c2b7..a421c32c6cfefe36ce0a1892e5462e2b8228f5b8 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3797,7 +3797,8 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
                         break;
                 if (wret < 0)
                         ret = wret;
-               if (trans->transaction->in_commit) {
+               if (trans->transaction->in_commit ||
+                   trans->transaction->delayed_refs.flushing) {
                         ret = -EAGAIN;
                         break;
                 }
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 7d4f948bc22a5b5c811b93b385ff36d5b49e8977..13a17477c4f4dacf91d03519a06cb4ccfd5a78ba 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1502,6 +1502,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
         struct btrfs_trans_handle *trans;
         struct btrfs_ordered_extent *ordered_extent;
         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct btrfs_path *path;
         int compressed = 0;
         int ret;
  
@@ -1509,6 +1510,23 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
         if (!ret)
                 return 0;
  
+       /*
+        * before we join the transaction, try to do some of our IO.
+        * This will limit the amount of IO that we have to do with
+        * the transaction running.  We're unlikely to need to do any
+        * IO if the file extents are new, the disk_i_size checks
+        * covers the most common case.
+        */
+       if (start < BTRFS_I(inode)->disk_i_size) {
+               path = btrfs_alloc_path();
+               if (path) {
+                       ret = btrfs_lookup_file_extent(NULL, root, path,
+                                                      inode->i_ino,
+                                                      start, 0);
+                       btrfs_free_path(path);
+               }
+       }
+
         trans = btrfs_join_transaction(root, 1);
  
         ordered_extent = btrfs_lookup_ordered_extent(inode, start);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c

index 903edab3659a3ce46af880e611416205cf300e92..01c9620bb0016e0cfcc8f1e346e9750060dfe966 100644 (file)
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -192,6 +192,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
         h->alloc_exclude_nr = 0;
         h->alloc_exclude_start = 0;
         h->delayed_ref_updates = 0;
+
         root->fs_info->running_transaction->use_count++;
         mutex_unlock(&root->fs_info->trans_mutex);
         return h;
@@ -281,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root)
         if (!root->fs_info->open_ioctl_trans)
                 wait_current_trans(root);
         mutex_unlock(&root->fs_info->trans_mutex);
-
         throttle_on_drops(root);
  }
  
@@ -298,6 +298,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
                 if (cur &&
                     trans->transaction->delayed_refs.num_heads_ready > 64) {
                         trans->delayed_ref_updates = 0;
+
+                       /*
+                        * do a full flush if the transaction is trying
+                        * to close
+                        */
+                       if (trans->transaction->delayed_refs.flushing)
+                               cur = 0;
                         btrfs_run_delayed_refs(trans, root, cur);
                 } else {
                         break;
@@ -665,6 +672,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
         return 0;
  }
  
+/*
+ * when dropping snapshots, we generate a ton of delayed refs, and it makes
+ * sense not to join the transaction while it is trying to flush the current
+ * queue of delayed refs out.
+ *
+ * This is used by the drop snapshot code only
+ */
+static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info)
+{
+       DEFINE_WAIT(wait);
+
+       mutex_lock(&info->trans_mutex);
+       while (info->running_transaction &&
+              info->running_transaction->delayed_refs.flushing) {
+               prepare_to_wait(&info->transaction_wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+               mutex_unlock(&info->trans_mutex);
+               schedule();
+               mutex_lock(&info->trans_mutex);
+               finish_wait(&info->transaction_wait, &wait);
+       }
+       mutex_unlock(&info->trans_mutex);
+       return 0;
+}
+
  /*
   * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on
   * all of them
@@ -692,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root,
                 atomic_inc(&root->fs_info->throttles);
  
                 while (1) {
+                       /*
+                        * we don't want to jump in and create a bunch of
+                        * delayed refs if the transaction is starting to close
+                        */
+                       wait_transaction_pre_flush(tree_root->fs_info);
                         trans = btrfs_start_transaction(tree_root, 1);
+
+                       /*
+                        * we've joined a transaction, make sure it isn't
+                        * closing right now
+                        */
+                       if (trans->transaction->delayed_refs.flushing) {
+                               btrfs_end_transaction(trans, tree_root);
+                               continue;
+                       }
+
                         mutex_lock(&root->fs_info->drop_mutex);
                         ret = btrfs_drop_snapshot(trans, dirty->root);
                         if (ret != -EAGAIN)
@@ -932,20 +979,20 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         ret = btrfs_run_delayed_refs(trans, root, 0);
         BUG_ON(ret);
  
+       cur_trans = trans->transaction;
         /*
          * set the flushing flag so procs in this transaction have to
          * start sending their work down.
          */
-       trans->transaction->delayed_refs.flushing = 1;
+       cur_trans->delayed_refs.flushing = 1;
  
         ret = btrfs_run_delayed_refs(trans, root, 0);
         BUG_ON(ret);
  
-       INIT_LIST_HEAD(&dirty_fs_roots);
         mutex_lock(&root->fs_info->trans_mutex);
-       if (trans->transaction->in_commit) {
-               cur_trans = trans->transaction;
-               trans->transaction->use_count++;
+       INIT_LIST_HEAD(&dirty_fs_roots);
+       if (cur_trans->in_commit) {
+               cur_trans->use_count++;
                 mutex_unlock(&root->fs_info->trans_mutex);
                 btrfs_end_transaction(trans, root);
  
@@ -968,7 +1015,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
  
         trans->transaction->in_commit = 1;
         trans->transaction->blocked = 1;
-       cur_trans = trans->transaction;
         if (cur_trans->list.prev != &root->fs_info->trans_list) {
                 prev_trans = list_entry(cur_trans->list.prev,
                                         struct btrfs_transaction, list);
@@ -1081,6 +1127,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         btrfs_copy_pinned(root, pinned_copy);
  
         trans->transaction->blocked = 0;
+
         wake_up(&root->fs_info->transaction_throttle);
         wake_up(&root->fs_info->transaction_wait);
  
@@ -1107,6 +1154,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
         mutex_lock(&root->fs_info->trans_mutex);
  
         cur_trans->commit_done = 1;
+
         root->fs_info->last_trans_committed = cur_trans->transid;
         wake_up(&cur_trans->commit_wait);
  
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h

index 94876709217f50bbbe018ab325c81ca4e6d5322b..94f5bde2b58d40144a5d5ddf3a11607cef4b0e82 100644 (file)
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -23,7 +23,12 @@
  
  struct btrfs_transaction {
         u64 transid;
+       /*
+        * total writers in this transaction, it must be zero before the
+        * transaction can end
+        */
         unsigned long num_writers;
+
         unsigned long num_joined;
         int in_commit;
         int use_count;
author	Chris Mason <chris.mason@oracle.com>
	Fri, 13 Mar 2009 00:12:45 +0000 (20:12 -0400)
committer	Chris Mason <chris.mason@oracle.com>
	Tue, 24 Mar 2009 20:14:26 +0000 (16:14 -0400)
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/transaction.c		patch \| blob \| history
fs/btrfs/transaction.h		patch \| blob \| history