kselftests: cgroup: add kernel memory accounting tests
authorRoman Gushchin <guro@fb.com>
Fri, 7 Aug 2020 06:21:30 +0000 (23:21 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 7 Aug 2020 18:33:25 +0000 (11:33 -0700)
Add some tests to cover the kernel memory accounting functionality.  These
are covering some issues (and changes) we had recently.

1) A test which allocates a lot of negative dentries, checks memcg slab
   statistics, creates memory pressure by setting memory.max to some low
   value and checks that some number of slabs was reclaimed.

2) A test which covers side effects of memcg destruction: it creates
   and destroys a large number of sub-cgroups, each containing a
   multi-threaded workload which allocates and releases some kernel
   memory.  Then it checks that the charge ans memory.stats do add up on
   the parent level.

3) A test which reads /proc/kpagecgroup and implicitly checks that it
   doesn't crash the system.

4) A test which spawns a large number of threads and checks that the
   kernel stacks accounting works as expected.

5) A test which checks that living charged slab objects are not
   preventing the memory cgroup from being released after being deleted by
   a user.

Signed-off-by: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Link: http://lkml.kernel.org/r/20200623174037.3951353-19-guro@fb.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
tools/testing/selftests/cgroup/.gitignore
tools/testing/selftests/cgroup/Makefile
tools/testing/selftests/cgroup/test_kmem.c [new file with mode: 0644]

index aa6de65b083843a3d08619bcef05e9a1fb160930..84cfcabea83873efb539f356e1fe6c75d5be2860 100644 (file)
@@ -2,3 +2,4 @@
 test_memcontrol
 test_core
 test_freezer
+test_kmem
\ No newline at end of file
index 967f268fde74e7f61d741e58e2b257a365e25c59..f027d933595b33fab0f8606a2d9cbc0ebb5a7572 100644 (file)
@@ -6,11 +6,13 @@ all:
 TEST_FILES     := with_stress.sh
 TEST_PROGS     := test_stress.sh
 TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_kmem
 TEST_GEN_PROGS += test_core
 TEST_GEN_PROGS += test_freezer
 
 include ../lib.mk
 
 $(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
 $(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
 $(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
new file mode 100644 (file)
index 0000000..5224dae
--- /dev/null
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+
+static int alloc_dcache(const char *cgroup, void *arg)
+{
+       unsigned long i;
+       struct stat st;
+       char buf[128];
+
+       for (i = 0; i < (unsigned long)arg; i++) {
+               snprintf(buf, sizeof(buf),
+                       "/something-non-existent-with-a-long-name-%64lu-%d",
+                        i, getpid());
+               stat(buf, &st);
+       }
+
+       return 0;
+}
+
+/*
+ * This test allocates 100000 of negative dentries with long names.
+ * Then it checks that "slab" in memory.stat is larger than 1M.
+ * Then it sets memory.high to 1M and checks that at least 1/2
+ * of slab memory has been reclaimed.
+ */
+static int test_kmem_basic(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cg = NULL;
+       long slab0, slab1, current;
+
+       cg = cg_name(root, "kmem_basic_test");
+       if (!cg)
+               goto cleanup;
+
+       if (cg_create(cg))
+               goto cleanup;
+
+       if (cg_run(cg, alloc_dcache, (void *)100000))
+               goto cleanup;
+
+       slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
+       if (slab0 < (1 << 20))
+               goto cleanup;
+
+       cg_write(cg, "memory.high", "1M");
+       slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
+       if (slab1 <= 0)
+               goto cleanup;
+
+       current = cg_read_long(cg, "memory.current");
+       if (current <= 0)
+               goto cleanup;
+
+       if (slab1 < slab0 / 2 && current < slab0 / 2)
+               ret = KSFT_PASS;
+cleanup:
+       cg_destroy(cg);
+       free(cg);
+
+       return ret;
+}
+
+static void *alloc_kmem_fn(void *arg)
+{
+       alloc_dcache(NULL, (void *)100);
+       return NULL;
+}
+
+static int alloc_kmem_smp(const char *cgroup, void *arg)
+{
+       int nr_threads = 2 * get_nprocs();
+       pthread_t *tinfo;
+       unsigned long i;
+       int ret = -1;
+
+       tinfo = calloc(nr_threads, sizeof(pthread_t));
+       if (tinfo == NULL)
+               return -1;
+
+       for (i = 0; i < nr_threads; i++) {
+               if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
+                                  (void *)i)) {
+                       free(tinfo);
+                       return -1;
+               }
+       }
+
+       for (i = 0; i < nr_threads; i++) {
+               ret = pthread_join(tinfo[i], NULL);
+               if (ret)
+                       break;
+       }
+
+       free(tinfo);
+       return ret;
+}
+
+static int cg_run_in_subcgroups(const char *parent,
+                               int (*fn)(const char *cgroup, void *arg),
+                               void *arg, int times)
+{
+       char *child;
+       int i;
+
+       for (i = 0; i < times; i++) {
+               child = cg_name_indexed(parent, "child", i);
+               if (!child)
+                       return -1;
+
+               if (cg_create(child)) {
+                       cg_destroy(child);
+                       free(child);
+                       return -1;
+               }
+
+               if (cg_run(child, fn, NULL)) {
+                       cg_destroy(child);
+                       free(child);
+                       return -1;
+               }
+
+               cg_destroy(child);
+               free(child);
+       }
+
+       return 0;
+}
+
+/*
+ * The test creates and destroys a large number of cgroups. In each cgroup it
+ * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
+ * threads. Then it checks the sanity of numbers on the parent level:
+ * the total size of the cgroups should be roughly equal to
+ * anon + file + slab + kernel_stack.
+ */
+static int test_kmem_memcg_deletion(const char *root)
+{
+       long current, slab, anon, file, kernel_stack, sum;
+       int ret = KSFT_FAIL;
+       char *parent;
+
+       parent = cg_name(root, "kmem_memcg_deletion_test");
+       if (!parent)
+               goto cleanup;
+
+       if (cg_create(parent))
+               goto cleanup;
+
+       if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+               goto cleanup;
+
+       if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
+               goto cleanup;
+
+       current = cg_read_long(parent, "memory.current");
+       slab = cg_read_key_long(parent, "memory.stat", "slab ");
+       anon = cg_read_key_long(parent, "memory.stat", "anon ");
+       file = cg_read_key_long(parent, "memory.stat", "file ");
+       kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
+       if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
+           kernel_stack < 0)
+               goto cleanup;
+
+       sum = slab + anon + file + kernel_stack;
+       if (abs(sum - current) < 4096 * 32 * 2 * get_nprocs()) {
+               ret = KSFT_PASS;
+       } else {
+               printf("memory.current = %ld\n", current);
+               printf("slab + anon + file + kernel_stack = %ld\n", sum);
+               printf("slab = %ld\n", slab);
+               printf("anon = %ld\n", anon);
+               printf("file = %ld\n", file);
+               printf("kernel_stack = %ld\n", kernel_stack);
+       }
+
+cleanup:
+       cg_destroy(parent);
+       free(parent);
+
+       return ret;
+}
+
+/*
+ * The test reads the entire /proc/kpagecgroup. If the operation went
+ * successfully (and the kernel didn't panic), the test is treated as passed.
+ */
+static int test_kmem_proc_kpagecgroup(const char *root)
+{
+       unsigned long buf[128];
+       int ret = KSFT_FAIL;
+       ssize_t len;
+       int fd;
+
+       fd = open("/proc/kpagecgroup", O_RDONLY);
+       if (fd < 0)
+               return ret;
+
+       do {
+               len = read(fd, buf, sizeof(buf));
+       } while (len > 0);
+
+       if (len == 0)
+               ret = KSFT_PASS;
+
+       close(fd);
+       return ret;
+}
+
+static void *pthread_wait_fn(void *arg)
+{
+       sleep(100);
+       return NULL;
+}
+
+static int spawn_1000_threads(const char *cgroup, void *arg)
+{
+       int nr_threads = 1000;
+       pthread_t *tinfo;
+       unsigned long i;
+       long stack;
+       int ret = -1;
+
+       tinfo = calloc(nr_threads, sizeof(pthread_t));
+       if (tinfo == NULL)
+               return -1;
+
+       for (i = 0; i < nr_threads; i++) {
+               if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
+                                  (void *)i)) {
+                       free(tinfo);
+                       return(-1);
+               }
+       }
+
+       stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
+       if (stack >= 4096 * 1000)
+               ret = 0;
+
+       free(tinfo);
+       return ret;
+}
+
+/*
+ * The test spawns a process, which spawns 1000 threads. Then it checks
+ * that memory.stat's kernel_stack is at least 1000 pages large.
+ */
+static int test_kmem_kernel_stacks(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *cg = NULL;
+
+       cg = cg_name(root, "kmem_kernel_stacks_test");
+       if (!cg)
+               goto cleanup;
+
+       if (cg_create(cg))
+               goto cleanup;
+
+       if (cg_run(cg, spawn_1000_threads, NULL))
+               goto cleanup;
+
+       ret = KSFT_PASS;
+cleanup:
+       cg_destroy(cg);
+       free(cg);
+
+       return ret;
+}
+
+/*
+ * This test sequentionally creates 30 child cgroups, allocates some
+ * kernel memory in each of them, and deletes them. Then it checks
+ * that the number of dying cgroups on the parent level is 0.
+ */
+static int test_kmem_dead_cgroups(const char *root)
+{
+       int ret = KSFT_FAIL;
+       char *parent;
+       long dead;
+       int i;
+
+       parent = cg_name(root, "kmem_dead_cgroups_test");
+       if (!parent)
+               goto cleanup;
+
+       if (cg_create(parent))
+               goto cleanup;
+
+       if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+               goto cleanup;
+
+       if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
+               goto cleanup;
+
+       for (i = 0; i < 5; i++) {
+               dead = cg_read_key_long(parent, "cgroup.stat",
+                                       "nr_dying_descendants ");
+               if (dead == 0) {
+                       ret = KSFT_PASS;
+                       break;
+               }
+               /*
+                * Reclaiming cgroups might take some time,
+                * let's wait a bit and repeat.
+                */
+               sleep(1);
+       }
+
+cleanup:
+       cg_destroy(parent);
+       free(parent);
+
+       return ret;
+}
+
+#define T(x) { x, #x }
+struct kmem_test {
+       int (*fn)(const char *root);
+       const char *name;
+} tests[] = {
+       T(test_kmem_basic),
+       T(test_kmem_memcg_deletion),
+       T(test_kmem_proc_kpagecgroup),
+       T(test_kmem_kernel_stacks),
+       T(test_kmem_dead_cgroups),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+       char root[PATH_MAX];
+       int i, ret = EXIT_SUCCESS;
+
+       if (cg_find_unified_root(root, sizeof(root)))
+               ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+       /*
+        * Check that memory controller is available:
+        * memory is listed in cgroup.controllers
+        */
+       if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+               ksft_exit_skip("memory controller isn't available\n");
+
+       if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+               if (cg_write(root, "cgroup.subtree_control", "+memory"))
+                       ksft_exit_skip("Failed to set memory controller\n");
+
+       for (i = 0; i < ARRAY_SIZE(tests); i++) {
+               switch (tests[i].fn(root)) {
+               case KSFT_PASS:
+                       ksft_test_result_pass("%s\n", tests[i].name);
+                       break;
+               case KSFT_SKIP:
+                       ksft_test_result_skip("%s\n", tests[i].name);
+                       break;
+               default:
+                       ret = EXIT_FAILURE;
+                       ksft_test_result_fail("%s\n", tests[i].name);
+                       break;
+               }
+       }
+
+       return ret;
+}