mm: memcg/slab: fix memory leak at non-root kmem_cache destroy
authorMuchun Song <songmuchun@bytedance.com>
Fri, 24 Jul 2020 04:15:27 +0000 (21:15 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 24 Jul 2020 19:42:41 +0000 (12:42 -0700)
If the kmem_cache refcount is greater than one, we should not mark the
root kmem_cache as dying.  If we mark the root kmem_cache dying
incorrectly, the non-root kmem_cache can never be destroyed.  It
resulted in memory leak when memcg was destroyed.  We can use the
following steps to reproduce.

  1) Use kmem_cache_create() to create a new kmem_cache named A.
  2) Coincidentally, the kmem_cache A is an alias for kmem_cache B,
     so the refcount of B is just increased.
  3) Use kmem_cache_destroy() to destroy the kmem_cache A, just
     decrease the B's refcount but mark the B as dying.
  4) Create a new memory cgroup and alloc memory from the kmem_cache
     B. It leads to create a non-root kmem_cache for allocating memory.
  5) When destroy the memory cgroup created in the step 4), the
     non-root kmem_cache can never be destroyed.

If we repeat steps 4) and 5), this will cause a lot of memory leak.  So
only when refcount reach zero, we mark the root kmem_cache as dying.

Fixes: 92ee383f6daa ("mm: fix race between kmem_cache destroy, create and deactivate")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Roman Gushchin <guro@fb.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: <stable@vger.kernel.org>
Link: http://lkml.kernel.org/r/20200716165103.83462-1-songmuchun@bytedance.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/slab_common.c

index 37d48a5..fe8b684 100644 (file)
@@ -326,6 +326,14 @@ int slab_unmergeable(struct kmem_cache *s)
        if (s->refcount < 0)
                return 1;
 
+#ifdef CONFIG_MEMCG_KMEM
+       /*
+        * Skip the dying kmem_cache.
+        */
+       if (s->memcg_params.dying)
+               return 1;
+#endif
+
        return 0;
 }
 
@@ -886,12 +894,15 @@ static int shutdown_memcg_caches(struct kmem_cache *s)
        return 0;
 }
 
-static void flush_memcg_workqueue(struct kmem_cache *s)
+static void memcg_set_kmem_cache_dying(struct kmem_cache *s)
 {
        spin_lock_irq(&memcg_kmem_wq_lock);
        s->memcg_params.dying = true;
        spin_unlock_irq(&memcg_kmem_wq_lock);
+}
 
+static void flush_memcg_workqueue(struct kmem_cache *s)
+{
        /*
         * SLAB and SLUB deactivate the kmem_caches through call_rcu. Make
         * sure all registered rcu callbacks have been invoked.
@@ -923,10 +934,6 @@ static inline int shutdown_memcg_caches(struct kmem_cache *s)
 {
        return 0;
 }
-
-static inline void flush_memcg_workqueue(struct kmem_cache *s)
-{
-}
 #endif /* CONFIG_MEMCG_KMEM */
 
 void slab_kmem_cache_release(struct kmem_cache *s)
@@ -944,8 +951,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
        if (unlikely(!s))
                return;
 
-       flush_memcg_workqueue(s);
-
        get_online_cpus();
        get_online_mems();
 
@@ -955,6 +960,22 @@ void kmem_cache_destroy(struct kmem_cache *s)
        if (s->refcount)
                goto out_unlock;
 
+#ifdef CONFIG_MEMCG_KMEM
+       memcg_set_kmem_cache_dying(s);
+
+       mutex_unlock(&slab_mutex);
+
+       put_online_mems();
+       put_online_cpus();
+
+       flush_memcg_workqueue(s);
+
+       get_online_cpus();
+       get_online_mems();
+
+       mutex_lock(&slab_mutex);
+#endif
+
        err = shutdown_memcg_caches(s);
        if (!err)
                err = shutdown_cache(s);