mm, truncate: do not check mapping for every page being truncated
authorMel Gorman <mgorman@techsingularity.net>
Thu, 16 Nov 2017 01:37:41 +0000 (17:37 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 16 Nov 2017 02:21:06 +0000 (18:21 -0800)
During truncation, the mapping has already been checked for shmem and
dax so it's known that workingset_update_node is required.

This patch avoids the checks on mapping for each page being truncated.
In all other cases, a lookup helper is used to determine if
workingset_update_node() needs to be called.  The one danger is that the
API is slightly harder to use as calling workingset_update_node directly
without checking for dax or shmem mappings could lead to surprises.
However, the API rarely needs to be used and hopefully the comment is
enough to give people the hint.

sparsetruncate (tiny)
                              4.14.0-rc4             4.14.0-rc4
                             oneirq-v1r1        pickhelper-v1r1
Min          Time      141.00 (   0.00%)      140.00 (   0.71%)
1st-qrtle    Time      142.00 (   0.00%)      141.00 (   0.70%)
2nd-qrtle    Time      142.00 (   0.00%)      142.00 (   0.00%)
3rd-qrtle    Time      143.00 (   0.00%)      143.00 (   0.00%)
Max-90%      Time      144.00 (   0.00%)      144.00 (   0.00%)
Max-95%      Time      147.00 (   0.00%)      145.00 (   1.36%)
Max-99%      Time      195.00 (   0.00%)      191.00 (   2.05%)
Max          Time      230.00 (   0.00%)      205.00 (  10.87%)
Amean        Time      144.37 (   0.00%)      143.82 (   0.38%)
Stddev       Time       10.44 (   0.00%)        9.00 (  13.74%)
Coeff        Time        7.23 (   0.00%)        6.26 (  13.41%)
Best99%Amean Time      143.72 (   0.00%)      143.34 (   0.26%)
Best95%Amean Time      142.37 (   0.00%)      142.00 (   0.26%)
Best90%Amean Time      142.19 (   0.00%)      141.85 (   0.24%)
Best75%Amean Time      141.92 (   0.00%)      141.58 (   0.24%)
Best50%Amean Time      141.69 (   0.00%)      141.31 (   0.27%)
Best25%Amean Time      141.38 (   0.00%)      140.97 (   0.29%)

As you'd expect, the gain is marginal but it can be detected.  The
differences in bonnie are all within the noise which is not surprising
given the impact on the microbenchmark.

radix_tree_update_node_t is a callback for some radix operations that
optionally passes in a private field.  The only user of the callback is
workingset_update_node and as it no longer requires a mapping, the
private field is removed.

Link: http://lkml.kernel.org/r/20171018075952.10627-3-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
fs/dax.c
include/linux/radix-tree.h
include/linux/swap.h
lib/idr.c
lib/radix-tree.c
mm/filemap.c
mm/shmem.c
mm/truncate.c
mm/workingset.c
tools/testing/radix-tree/multiorder.c

index 9ec797424e4f9ff293a341417f7b53aa46652e1e..165fdfb6e508c675a34d9cc9e72b53fe4df61176 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -565,7 +565,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
                ret = __radix_tree_lookup(page_tree, index, &node, &slot);
                WARN_ON_ONCE(ret != entry);
                __radix_tree_replace(page_tree, node, slot,
-                                    new_entry, NULL, NULL);
+                                    new_entry, NULL);
                entry = new_entry;
        }
 
index 567ebb5eaab01f767f7e091a019a85ffd5d2fe73..0ca448c1cb42f32a09c8ad607ee15d1169fcb1ae 100644 (file)
@@ -301,18 +301,17 @@ void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index,
 void *radix_tree_lookup(const struct radix_tree_root *, unsigned long);
 void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *,
                                        unsigned long index);
-typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *);
+typedef void (*radix_tree_update_node_t)(struct radix_tree_node *);
 void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *,
                          void __rcu **slot, void *entry,
-                         radix_tree_update_node_t update_node, void *private);
+                         radix_tree_update_node_t update_node);
 void radix_tree_iter_replace(struct radix_tree_root *,
                const struct radix_tree_iter *, void __rcu **slot, void *entry);
 void radix_tree_replace_slot(struct radix_tree_root *,
                             void __rcu **slot, void *entry);
 void __radix_tree_delete_node(struct radix_tree_root *,
                              struct radix_tree_node *,
-                             radix_tree_update_node_t update_node,
-                             void *private);
+                             radix_tree_update_node_t update_node);
 void radix_tree_iter_delete(struct radix_tree_root *,
                        struct radix_tree_iter *iter, void __rcu **slot);
 void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *);
index 8b8a6f965785264295bf7b5470803f8fdd8159ae..454f042bcdd535a8acde067f9997de8ce9a6fa81 100644 (file)
@@ -298,7 +298,18 @@ struct vma_swap_readahead {
 void *workingset_eviction(struct address_space *mapping, struct page *page);
 bool workingset_refault(void *shadow);
 void workingset_activation(struct page *page);
-void workingset_update_node(struct radix_tree_node *node, void *private);
+
+/* Do not use directly, use workingset_lookup_update */
+void workingset_update_node(struct radix_tree_node *node);
+
+/* Returns workingset_update_node() if the mapping has shadow entries. */
+#define workingset_lookup_update(mapping)                              \
+({                                                                     \
+       radix_tree_update_node_t __helper = workingset_update_node;     \
+       if (dax_mapping(mapping) || shmem_mapping(mapping))             \
+               __helper = NULL;                                        \
+       __helper;                                                       \
+})
 
 /* linux/mm/page_alloc.c */
 extern unsigned long totalram_pages;
index edd9b2be1651fa99ae3a6274bda38c1bd7159a64..2593ce513a180a4f6de3515350592c9d4e4981d1 100644 (file)
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -171,7 +171,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
        if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
                return ERR_PTR(-ENOENT);
 
-       __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL, NULL);
+       __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
 
        return entry;
 }
index 8b1feca1230a08c4537036f517cfa4369b62dffa..c8d55565fafa6b80d597e69c2a6bd096d0857e3b 100644 (file)
@@ -677,8 +677,7 @@ out:
  *     @root           radix tree root
  */
 static inline bool radix_tree_shrink(struct radix_tree_root *root,
-                                    radix_tree_update_node_t update_node,
-                                    void *private)
+                                    radix_tree_update_node_t update_node)
 {
        bool shrunk = false;
 
@@ -739,7 +738,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
                if (!radix_tree_is_internal_node(child)) {
                        node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
                        if (update_node)
-                               update_node(node, private);
+                               update_node(node);
                }
 
                WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -752,7 +751,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
 
 static bool delete_node(struct radix_tree_root *root,
                        struct radix_tree_node *node,
-                       radix_tree_update_node_t update_node, void *private)
+                       radix_tree_update_node_t update_node)
 {
        bool deleted = false;
 
@@ -762,8 +761,8 @@ static bool delete_node(struct radix_tree_root *root,
                if (node->count) {
                        if (node_to_entry(node) ==
                                        rcu_dereference_raw(root->rnode))
-                               deleted |= radix_tree_shrink(root, update_node,
-                                                               private);
+                               deleted |= radix_tree_shrink(root,
+                                                               update_node);
                        return deleted;
                }
 
@@ -1173,7 +1172,6 @@ static int calculate_count(struct radix_tree_root *root,
  * @slot:              pointer to slot in @node
  * @item:              new item to store in the slot.
  * @update_node:       callback for changing leaf nodes
- * @private:           private data to pass to @update_node
  *
  * For use with __radix_tree_lookup().  Caller must hold tree write locked
  * across slot lookup and replacement.
@@ -1181,7 +1179,7 @@ static int calculate_count(struct radix_tree_root *root,
 void __radix_tree_replace(struct radix_tree_root *root,
                          struct radix_tree_node *node,
                          void __rcu **slot, void *item,
-                         radix_tree_update_node_t update_node, void *private)
+                         radix_tree_update_node_t update_node)
 {
        void *old = rcu_dereference_raw(*slot);
        int exceptional = !!radix_tree_exceptional_entry(item) -
@@ -1201,9 +1199,9 @@ void __radix_tree_replace(struct radix_tree_root *root,
                return;
 
        if (update_node)
-               update_node(node, private);
+               update_node(node);
 
-       delete_node(root, node, update_node, private);
+       delete_node(root, node, update_node);
 }
 
 /**
@@ -1225,7 +1223,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
 void radix_tree_replace_slot(struct radix_tree_root *root,
                             void __rcu **slot, void *item)
 {
-       __radix_tree_replace(root, NULL, slot, item, NULL, NULL);
+       __radix_tree_replace(root, NULL, slot, item, NULL);
 }
 EXPORT_SYMBOL(radix_tree_replace_slot);
 
@@ -1242,7 +1240,7 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
                                const struct radix_tree_iter *iter,
                                void __rcu **slot, void *item)
 {
-       __radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
+       __radix_tree_replace(root, iter->node, slot, item, NULL);
 }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
@@ -1972,7 +1970,6 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  *     @root:          radix tree root
  *     @node:          node containing @index
  *     @update_node:   callback for changing leaf nodes
- *     @private:       private data to pass to @update_node
  *
  *     After clearing the slot at @index in @node from radix tree
  *     rooted at @root, call this function to attempt freeing the
@@ -1980,10 +1977,9 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
  */
 void __radix_tree_delete_node(struct radix_tree_root *root,
                              struct radix_tree_node *node,
-                             radix_tree_update_node_t update_node,
-                             void *private)
+                             radix_tree_update_node_t update_node)
 {
-       delete_node(root, node, update_node, private);
+       delete_node(root, node, update_node);
 }
 
 static bool __radix_tree_delete(struct radix_tree_root *root,
@@ -2001,7 +1997,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
                        node_tag_clear(root, node, tag, offset);
 
        replace_slot(slot, NULL, node, -1, exceptional);
-       return node && delete_node(root, node, NULL, NULL);
+       return node && delete_node(root, node, NULL);
 }
 
 /**
index a470dd8cd05b602fb4f0c5a43386531a061fd840..155370fc87f24e5da09e608bff75fd097bdbf42f 100644 (file)
@@ -35,6 +35,7 @@
 #include <linux/hugetlb.h>
 #include <linux/memcontrol.h>
 #include <linux/cleancache.h>
+#include <linux/shmem_fs.h>
 #include <linux/rmap.h>
 #include "internal.h"
 
@@ -134,7 +135,7 @@ static int page_cache_tree_insert(struct address_space *mapping,
                        *shadowp = p;
        }
        __radix_tree_replace(&mapping->page_tree, node, slot, page,
-                            workingset_update_node, mapping);
+                            workingset_lookup_update(mapping));
        mapping->nrpages++;
        return 0;
 }
@@ -162,7 +163,7 @@ static void page_cache_tree_delete(struct address_space *mapping,
 
                radix_tree_clear_tags(&mapping->page_tree, node, slot);
                __radix_tree_replace(&mapping->page_tree, node, slot, shadow,
-                                    workingset_update_node, mapping);
+                               workingset_lookup_update(mapping));
        }
 
        page->mapping = NULL;
@@ -359,7 +360,7 @@ page_cache_tree_delete_batch(struct address_space *mapping,
                }
                radix_tree_clear_tags(&mapping->page_tree, iter.node, slot);
                __radix_tree_replace(&mapping->page_tree, iter.node, slot, NULL,
-                                    workingset_update_node, mapping);
+                               workingset_lookup_update(mapping));
                total_pages++;
        }
        mapping->nrpages -= total_pages;
index 07a1d22807beb7cfb5f85c3343a5eacdc28fb523..a72f68aee6a4801b36b583f3fc6ecfcf5abee748 100644 (file)
@@ -338,7 +338,7 @@ static int shmem_radix_tree_replace(struct address_space *mapping,
        if (item != expected)
                return -ENOENT;
        __radix_tree_replace(&mapping->page_tree, node, pslot,
-                            replacement, NULL, NULL);
+                            replacement, NULL);
        return 0;
 }
 
index 4a39a3150ee22a4a085516438875a0bf731c8abc..02a0c0466c784d6d57c793643fff81bce10bbbda 100644 (file)
@@ -42,7 +42,7 @@ static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
        if (*slot != entry)
                goto unlock;
        __radix_tree_replace(&mapping->page_tree, node, slot, NULL,
-                            workingset_update_node, mapping);
+                            workingset_update_node);
        mapping->nrexceptional--;
 unlock:
        spin_unlock_irq(&mapping->tree_lock);
index b997c9de28f6c4074f1ebc1e2f480bbae8ad1df9..b7d616a3bbbe46e1d2036f33948d197a29853f48 100644 (file)
@@ -340,14 +340,8 @@ out:
 
 static struct list_lru shadow_nodes;
 
-void workingset_update_node(struct radix_tree_node *node, void *private)
+void workingset_update_node(struct radix_tree_node *node)
 {
-       struct address_space *mapping = private;
-
-       /* Only regular page cache has shadow entries */
-       if (dax_mapping(mapping) || shmem_mapping(mapping))
-               return;
-
        /*
         * Track non-empty nodes that contain only shadow entries;
         * unlink those that contain pages or are being freed.
@@ -475,7 +469,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
                goto out_invalid;
        inc_lruvec_page_state(virt_to_page(node), WORKINGSET_NODERECLAIM);
        __radix_tree_delete_node(&mapping->page_tree, node,
-                                workingset_update_node, mapping);
+                                workingset_lookup_update(mapping));
 
 out_invalid:
        spin_unlock(&mapping->tree_lock);
index 06c71178d07d98ab03bbf1575a3d98fa5299f7b1..59245b3d587c35c039c77e673bdd706319368167 100644 (file)
@@ -618,7 +618,7 @@ static void multiorder_account(void)
        __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
        __radix_tree_lookup(&tree, 1 << 5, &node, &slot);
        assert(node->count == node->exceptional * 2);
-       __radix_tree_replace(&tree, node, slot, NULL, NULL, NULL);
+       __radix_tree_replace(&tree, node, slot, NULL, NULL);
        assert(node->exceptional == 0);
 
        item_kill_tree(&tree);