mm, slub: change percpu partial accounting from objects to pages

author Vlastimil Babka <vbabka@suse.cz>

Fri, 5 Nov 2021 20:35:17 +0000 (13:35 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 6 Nov 2021 20:30:32 +0000 (13:30 -0700)
author Vlastimil Babka <vbabka@suse.cz>
Fri, 5 Nov 2021 20:35:17 +0000 (13:35 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Nov 2021 20:30:32 +0000 (13:30 -0700)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 7f8ee09c711f411cfd3198333d68f8995437b463..68ffa064b7a813beda4e1301472b7ebd81ed1596 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -124,10 +124,8 @@ struct page {
                                         struct page *next;
  #ifdef CONFIG_64BIT
                                         int pages;      /* Nr of pages left */
-                                       int pobjects;   /* Approximate count */
  #else
                                         short int pages;
-                                       short int pobjects;
  #endif
                                 };
                         };
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h

index 85499f0586b06c55d2a494e5888bda1e2d651b9d..0fa751b946fa0b93e7e011039fee4ad9e4db048a 100644 (file)
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -99,6 +99,8 @@ struct kmem_cache {
  #ifdef CONFIG_SLUB_CPU_PARTIAL
         /* Number of per cpu partial objects to keep around */
         unsigned int cpu_partial;
+       /* Number of per cpu partial pages to keep around */
+       unsigned int cpu_partial_pages;
  #endif
         struct kmem_cache_order_objects oo;
  
@@ -141,17 +143,6 @@ struct kmem_cache {
         struct kmem_cache_node *node[MAX_NUMNODES];
  };
  
-#ifdef CONFIG_SLUB_CPU_PARTIAL
-#define slub_cpu_partial(s)            ((s)->cpu_partial)
-#define slub_set_cpu_partial(s, n)             \
-({                                             \
-       slub_cpu_partial(s) = (n);              \
-})
-#else
-#define slub_cpu_partial(s)            (0)
-#define slub_set_cpu_partial(s, n)
-#endif /* CONFIG_SLUB_CPU_PARTIAL */
-
  #ifdef CONFIG_SYSFS
  #define SLAB_SUPPORTS_SYSFS
  void sysfs_slab_unlink(struct kmem_cache *);
diff --git a/mm/slub.c b/mm/slub.c

index b6a1790812f77871951097c76a62fdd45c4b4683..0df81ea24b9158a2f4cc6c1d9f6f579d68dc99ba 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -414,6 +414,29 @@ static inline unsigned int oo_objects(struct kmem_cache_order_objects x)
         return x.x & OO_MASK;
  }
  
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+static void slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
+{
+       unsigned int nr_pages;
+
+       s->cpu_partial = nr_objects;
+
+       /*
+        * We take the number of objects but actually limit the number of
+        * pages on the per cpu partial list, in order to limit excessive
+        * growth of the list. For simplicity we assume that the pages will
+        * be half-full.
+        */
+       nr_pages = DIV_ROUND_UP(nr_objects * 2, oo_objects(s->oo));
+       s->cpu_partial_pages = nr_pages;
+}
+#else
+static inline void
+slub_set_cpu_partial(struct kmem_cache *s, unsigned int nr_objects)
+{
+}
+#endif /* CONFIG_SLUB_CPU_PARTIAL */
+
  /*
   * Per slab locking using the pagelock
   */
@@ -2052,7 +2075,7 @@ static inline void remove_partial(struct kmem_cache_node *n,
   */
  static inline void *acquire_slab(struct kmem_cache *s,
                 struct kmem_cache_node *n, struct page *page,
-               int mode, int *objects)
+               int mode)
  {
         void *freelist;
         unsigned long counters;
@@ -2068,7 +2091,6 @@ static inline void *acquire_slab(struct kmem_cache *s,
         freelist = page->freelist;
         counters = page->counters;
         new.counters = counters;
-       *objects = new.objects - new.inuse;
         if (mode) {
                 new.inuse = page->objects;
                 new.freelist = NULL;
@@ -2106,9 +2128,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
  {
         struct page *page, *page2;
         void *object = NULL;
-       unsigned int available = 0;
         unsigned long flags;
-       int objects;
+       unsigned int partial_pages = 0;
  
         /*
          * Racy check. If we mistakenly see no partial slabs then we
@@ -2126,11 +2147,10 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
                 if (!pfmemalloc_match(page, gfpflags))
                         continue;
  
-               t = acquire_slab(s, n, page, object == NULL, &objects);
+               t = acquire_slab(s, n, page, object == NULL);
                 if (!t)
                         break;
  
-               available += objects;
                 if (!object) {
                         *ret_page = page;
                         stat(s, ALLOC_FROM_PARTIAL);
@@ -2138,10 +2158,15 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
                 } else {
                         put_cpu_partial(s, page, 0);
                         stat(s, CPU_PARTIAL_NODE);
+                       partial_pages++;
                 }
+#ifdef CONFIG_SLUB_CPU_PARTIAL
                 if (!kmem_cache_has_cpu_partial(s)
-                       || available > slub_cpu_partial(s) / 2)
+                       || partial_pages > s->cpu_partial_pages / 2)
                         break;
+#else
+               break;
+#endif
  
         }
         spin_unlock_irqrestore(&n->list_lock, flags);
@@ -2546,14 +2571,13 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
         struct page *page_to_unfreeze = NULL;
         unsigned long flags;
         int pages = 0;
-       int pobjects = 0;
  
         local_lock_irqsave(&s->cpu_slab->lock, flags);
  
         oldpage = this_cpu_read(s->cpu_slab->partial);
  
         if (oldpage) {
-               if (drain && oldpage->pobjects > slub_cpu_partial(s)) {
+               if (drain && oldpage->pages >= s->cpu_partial_pages) {
                         /*
                          * Partial array is full. Move the existing set to the
                          * per node partial list. Postpone the actual unfreezing
@@ -2562,16 +2586,13 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
                         page_to_unfreeze = oldpage;
                         oldpage = NULL;
                 } else {
-                       pobjects = oldpage->pobjects;
                         pages = oldpage->pages;
                 }
         }
  
         pages++;
-       pobjects += page->objects - page->inuse;
  
         page->pages = pages;
-       page->pobjects = pobjects;
         page->next = oldpage;
  
         this_cpu_write(s->cpu_slab->partial, page);
@@ -3991,6 +4012,8 @@ static void set_min_partial(struct kmem_cache *s, unsigned long min)
  static void set_cpu_partial(struct kmem_cache *s)
  {
  #ifdef CONFIG_SLUB_CPU_PARTIAL
+       unsigned int nr_objects;
+
         /*
          * cpu_partial determined the maximum number of objects kept in the
          * per cpu partial lists of a processor.
@@ -4000,24 +4023,22 @@ static void set_cpu_partial(struct kmem_cache *s)
          * filled up again with minimal effort. The slab will never hit the
          * per node partial lists and therefore no locking will be required.
          *
-        * This setting also determines
-        *
-        * A) The number of objects from per cpu partial slabs dumped to the
-        *    per node list when we reach the limit.
-        * B) The number of objects in cpu partial slabs to extract from the
-        *    per node list when we run out of per cpu objects. We only fetch
-        *    50% to keep some capacity around for frees.
+        * For backwards compatibility reasons, this is determined as number
+        * of objects, even though we now limit maximum number of pages, see
+        * slub_set_cpu_partial()
          */
         if (!kmem_cache_has_cpu_partial(s))
-               slub_set_cpu_partial(s, 0);
+               nr_objects = 0;
         else if (s->size >= PAGE_SIZE)
-               slub_set_cpu_partial(s, 2);
+               nr_objects = 2;
         else if (s->size >= 1024)
-               slub_set_cpu_partial(s, 6);
+               nr_objects = 6;
         else if (s->size >= 256)
-               slub_set_cpu_partial(s, 13);
+               nr_objects = 13;
         else
-               slub_set_cpu_partial(s, 30);
+               nr_objects = 30;
+
+       slub_set_cpu_partial(s, nr_objects);
  #endif
  }
  
@@ -5392,7 +5413,12 @@ SLAB_ATTR(min_partial);
  
  static ssize_t cpu_partial_show(struct kmem_cache *s, char *buf)
  {
-       return sysfs_emit(buf, "%u\n", slub_cpu_partial(s));
+       unsigned int nr_partial = 0;
+#ifdef CONFIG_SLUB_CPU_PARTIAL
+       nr_partial = s->cpu_partial;
+#endif
+
+       return sysfs_emit(buf, "%u\n", nr_partial);
  }
  
  static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
@@ -5463,12 +5489,12 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
  
                 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
  
-               if (page) {
+               if (page)
                         pages += page->pages;
-                       objects += page->pobjects;
-               }
         }
  
+       /* Approximate half-full pages , see slub_set_cpu_partial() */
+       objects = (pages * oo_objects(s->oo)) / 2;
         len += sysfs_emit_at(buf, len, "%d(%d)", objects, pages);
  
  #ifdef CONFIG_SMP
@@ -5476,9 +5502,12 @@ static ssize_t slabs_cpu_partial_show(struct kmem_cache *s, char *buf)
                 struct page *page;
  
                 page = slub_percpu_partial(per_cpu_ptr(s->cpu_slab, cpu));
-               if (page)
+               if (page) {
+                       pages = READ_ONCE(page->pages);
+                       objects = (pages * oo_objects(s->oo)) / 2;
                         len += sysfs_emit_at(buf, len, " C%d=%d(%d)",
-                                            cpu, page->pobjects, page->pages);
+                                            cpu, objects, pages);
+               }
         }
  #endif
         len += sysfs_emit_at(buf, len, "\n");
author	Vlastimil Babka <vbabka@suse.cz>
	Fri, 5 Nov 2021 20:35:17 +0000 (13:35 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 6 Nov 2021 20:30:32 +0000 (13:30 -0700)
include/linux/mm_types.h		patch \| blob \| history
include/linux/slub_def.h		patch \| blob \| history
mm/slub.c		patch \| blob \| history