Btrfs: fix very slow inode eviction and fs unmount

author Filipe David Borba Manana <fdmanana@gmail.com>

Tue, 19 Nov 2013 22:29:35 +0000 (22:29 +0000)

committer Chris Mason <clm@fb.com>

Tue, 28 Jan 2014 21:19:44 +0000 (13:19 -0800)
author Filipe David Borba Manana <fdmanana@gmail.com>
Tue, 19 Nov 2013 22:29:35 +0000 (22:29 +0000)
committer Chris Mason <clm@fb.com>
Tue, 28 Jan 2014 21:19:44 +0000 (13:19 -0800)
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 5a5de36..e889779 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4488,6 +4488,62 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
         return err;
  }
  
+/*
+ * While truncating the inode pages during eviction, we get the VFS calling
+ * btrfs_invalidatepage() against each page of the inode. This is slow because
+ * the calls to btrfs_invalidatepage() result in a huge amount of calls to
+ * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting
+ * extent_state structures over and over, wasting lots of time.
+ *
+ * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all
+ * those expensive operations on a per page basis and do only the ordered io
+ * finishing, while we release here the extent_map and extent_state structures,
+ * without the excessive merging and splitting.
+ */
+static void evict_inode_truncate_pages(struct inode *inode)
+{
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree;
+       struct rb_node *node;
+
+       ASSERT(inode->i_state & I_FREEING);
+       truncate_inode_pages(&inode->i_data, 0);
+
+       write_lock(&map_tree->lock);
+       while (!RB_EMPTY_ROOT(&map_tree->map)) {
+               struct extent_map *em;
+
+               node = rb_first(&map_tree->map);
+               em = rb_entry(node, struct extent_map, rb_node);
+               remove_extent_mapping(map_tree, em);
+               free_extent_map(em);
+       }
+       write_unlock(&map_tree->lock);
+
+       spin_lock(&io_tree->lock);
+       while (!RB_EMPTY_ROOT(&io_tree->state)) {
+               struct extent_state *state;
+               struct extent_state *cached_state = NULL;
+
+               node = rb_first(&io_tree->state);
+               state = rb_entry(node, struct extent_state, rb_node);
+               atomic_inc(&state->refs);
+               spin_unlock(&io_tree->lock);
+
+               lock_extent_bits(io_tree, state->start, state->end,
+                                0, &cached_state);
+               clear_extent_bit(io_tree, state->start, state->end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+               free_extent_state(state);
+
+               spin_lock(&io_tree->lock);
+       }
+       spin_unlock(&io_tree->lock);
+}
+
  void btrfs_evict_inode(struct inode *inode)
  {
         struct btrfs_trans_handle *trans;
@@ -4498,7 +4554,8 @@ void btrfs_evict_inode(struct inode *inode)
  
         trace_btrfs_inode_evict(inode);
  
-       truncate_inode_pages(&inode->i_data, 0);
+       evict_inode_truncate_pages(inode);
+
         if (inode->i_nlink &&
             ((btrfs_root_refs(&root->root_item) != 0 &&
               root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) ||
@@ -7379,6 +7436,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
         struct extent_state *cached_state = NULL;
         u64 page_start = page_offset(page);
         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+       int inode_evicting = inode->i_state & I_FREEING;
  
         /*
          * we have the page locked, so new writeback can't start,
@@ -7394,17 +7452,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                 btrfs_releasepage(page, GFP_NOFS);
                 return;
         }
-       lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
-       ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
+
+       if (!inode_evicting)
+               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+       ordered = btrfs_lookup_ordered_extent(inode, page_start);
         if (ordered) {
                 /*
                  * IO on this page will never be started, so we need
                  * to account for any ordered extents now
                  */
-               clear_extent_bit(tree, page_start, page_end,
-                                EXTENT_DIRTY | EXTENT_DELALLOC |
-                                EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
-                                EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS);
+               if (!inode_evicting)
+                       clear_extent_bit(tree, page_start, page_end,
+                                        EXTENT_DIRTY | EXTENT_DELALLOC |
+                                        EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                        EXTENT_DEFRAG, 1, 0, &cached_state,
+                                        GFP_NOFS);
                 /*
                  * whoever cleared the private bit is responsible
                  * for the finish_ordered_io
@@ -7428,14 +7490,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                                 btrfs_finish_ordered_io(ordered);
                 }
                 btrfs_put_ordered_extent(ordered);
-               cached_state = NULL;
-               lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
+               if (!inode_evicting) {
+                       cached_state = NULL;
+                       lock_extent_bits(tree, page_start, page_end, 0,
+                                        &cached_state);
+               }
+       }
+
+       if (!inode_evicting) {
+               clear_extent_bit(tree, page_start, page_end,
+                                EXTENT_LOCKED | EXTENT_DIRTY |
+                                EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+                                EXTENT_DEFRAG, 1, 1,
+                                &cached_state, GFP_NOFS);
+
+               __btrfs_releasepage(page, GFP_NOFS);
         }
-       clear_extent_bit(tree, page_start, page_end,
-                EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
-                EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1,
-                &cached_state, GFP_NOFS);
-       __btrfs_releasepage(page, GFP_NOFS);
  
         ClearPageChecked(page);
         if (PagePrivate(page)) {
author	Filipe David Borba Manana <fdmanana@gmail.com>
	Tue, 19 Nov 2013 22:29:35 +0000 (22:29 +0000)
committer	Chris Mason <clm@fb.com>
	Tue, 28 Jan 2014 21:19:44 +0000 (13:19 -0800)