1 // SPDX-License-Identifier: GPL-2.0
13 #include "../kselftest.h"
14 #include "../../../../include/vdso/time64.h"
17 #define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
18 #define KSM_FP(s) (KSM_SYSFS_PATH s)
19 #define KSM_SCAN_LIMIT_SEC_DEFAULT 120
20 #define KSM_PAGE_COUNT_DEFAULT 10l
21 #define KSM_PROT_STR_DEFAULT "rw"
22 #define KSM_USE_ZERO_PAGES_DEFAULT false
23 #define KSM_MERGE_ACROSS_NODES_DEFAULT true
24 #define MB (1ul << 20)
27 unsigned long max_page_sharing;
28 unsigned long merge_across_nodes;
29 unsigned long pages_to_scan;
31 unsigned long sleep_millisecs;
32 unsigned long stable_node_chains_prune_millisecs;
33 unsigned long use_zero_pages;
39 CHECK_KSM_ZERO_PAGE_MERGE,
42 KSM_MERGE_TIME_HUGE_PAGES,
46 static int ksm_write_sysfs(const char *file_path, unsigned long val)
48 FILE *f = fopen(file_path, "w");
51 fprintf(stderr, "f %s\n", file_path);
55 if (fprintf(f, "%lu", val) < 0) {
65 static int ksm_read_sysfs(const char *file_path, unsigned long *val)
67 FILE *f = fopen(file_path, "r");
70 fprintf(stderr, "f %s\n", file_path);
74 if (fscanf(f, "%lu", val) != 1) {
84 static int str_to_prot(char *prot_str)
88 if ((strchr(prot_str, 'r')) != NULL)
90 if ((strchr(prot_str, 'w')) != NULL)
92 if ((strchr(prot_str, 'x')) != NULL)
98 static void print_help(void)
100 printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
101 "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
103 printf("Supported <test type>:\n"
104 " -M (page merging)\n"
105 " -Z (zero pages merging)\n"
106 " -N (merging of pages in different NUMA nodes)\n"
107 " -U (page unmerging)\n"
108 " -P evaluate merging time and speed.\n"
109 " For this test, the size of duplicated memory area (in MiB)\n"
110 " must be provided using -s option\n"
111 " -H evaluate merging time and speed of area allocated mostly with huge pages\n"
112 " For this test, the size of duplicated memory area (in MiB)\n"
113 " must be provided using -s option\n"
114 " -C evaluate the time required to break COW of merged pages.\n\n");
116 printf(" -a: specify the access protections of pages.\n"
117 " <prot> must be of the form [rwx].\n"
118 " Default: %s\n", KSM_PROT_STR_DEFAULT);
119 printf(" -p: specify the number of pages to test.\n"
120 " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT);
121 printf(" -l: limit the maximum running time (in seconds) for a test.\n"
122 " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT);
123 printf(" -z: change use_zero_pages tunable\n"
124 " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
125 printf(" -m: change merge_across_nodes tunable\n"
126 " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
127 printf(" -s: the size of duplicated memory area (in MiB)\n");
132 static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size)
134 void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
140 memset(map_ptr, data, map_size);
141 if (mprotect(map_ptr, map_size, prot)) {
143 munmap(map_ptr, map_size);
150 static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
152 struct timespec cur_time;
153 unsigned long cur_scan, init_scan;
155 if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan))
157 cur_scan = init_scan;
159 while (cur_scan < init_scan + scan_count) {
160 if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
162 if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
163 perror("clock_gettime");
166 if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
167 printf("Scan time limit exceeded\n");
175 static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time, int timeout)
177 if (madvise(addr, size, MADV_MERGEABLE)) {
181 if (ksm_write_sysfs(KSM_FP("run"), 1))
184 /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */
185 if (ksm_do_scan(2, start_time, timeout))
191 static bool assert_ksm_pages_count(long dupl_page_count)
193 unsigned long max_page_sharing, pages_sharing, pages_shared;
195 if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
196 ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
197 ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing))
201 * Since there must be at least 2 pages for merging and 1 page can be
202 * shared with the limited number of pages (max_page_sharing), sometimes
203 * there are 'leftover' pages that cannot be merged. For example, if there
204 * are 11 pages and max_page_sharing = 10, then only 10 pages will be
205 * merged and the 11th page won't be affected. As a result, when the number
206 * of duplicate pages is divided by max_page_sharing and the remainder is 1,
207 * pages_shared and pages_sharing values will be equal between dupl_page_count
208 * and dupl_page_count - 1.
210 if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) {
211 if (pages_shared == dupl_page_count / max_page_sharing &&
212 pages_sharing == pages_shared * (max_page_sharing - 1))
215 if (pages_shared == (dupl_page_count / max_page_sharing + 1) &&
216 pages_sharing == dupl_page_count - pages_shared)
223 static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
225 if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
226 numa_available() ? 0 :
227 ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
228 ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
229 ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
230 ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
231 ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
232 &ksm_sysfs->stable_node_chains_prune_millisecs) ||
233 ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages))
239 static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
241 if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
242 numa_available() ? 0 :
243 ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
244 ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
245 ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
246 ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
247 ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
248 ksm_sysfs->stable_node_chains_prune_millisecs) ||
249 ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages))
255 static int check_ksm_merge(int mapping, int prot, long page_count, int timeout, size_t page_size)
258 struct timespec start_time;
260 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
261 perror("clock_gettime");
265 /* fill pages with the same data and merge them */
266 map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
270 if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
273 /* verify that the right number of pages are merged */
274 if (assert_ksm_pages_count(page_count)) {
276 munmap(map_ptr, page_size * page_count);
282 munmap(map_ptr, page_size * page_count);
286 static int check_ksm_unmerge(int mapping, int prot, int timeout, size_t page_size)
289 struct timespec start_time;
292 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
293 perror("clock_gettime");
297 /* fill pages with the same data and merge them */
298 map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
302 if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
305 /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */
306 memset(map_ptr, '-', 1);
307 memset(map_ptr + page_size, '+', 1);
309 /* get at least 1 scan, so KSM can detect that the pages were modified */
310 if (ksm_do_scan(1, start_time, timeout))
313 /* check that unmerging was successful and 0 pages are currently merged */
314 if (assert_ksm_pages_count(0)) {
316 munmap(map_ptr, page_size * page_count);
322 munmap(map_ptr, page_size * page_count);
326 static int check_ksm_zero_page_merge(int mapping, int prot, long page_count, int timeout,
327 bool use_zero_pages, size_t page_size)
330 struct timespec start_time;
332 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
333 perror("clock_gettime");
337 if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages))
340 /* fill pages with zero and try to merge them */
341 map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count);
345 if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
349 * verify that the right number of pages are merged:
350 * 1) if use_zero_pages is set to 1, empty pages are merged
351 * with the kernel zero page instead of with each other;
352 * 2) if use_zero_pages is set to 0, empty pages are not treated specially
353 * and merged as usual.
355 if (use_zero_pages && !assert_ksm_pages_count(0))
357 else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
361 munmap(map_ptr, page_size * page_count);
366 munmap(map_ptr, page_size * page_count);
370 static int get_next_mem_node(int node)
375 int i, max_node = numa_max_node();
377 for (i = node + 1; i <= max_node + node; i++) {
378 mem_node = i % (max_node + 1);
379 node_size = numa_node_size(mem_node, NULL);
386 static int get_first_mem_node(void)
388 return get_next_mem_node(numa_max_node());
391 static int check_ksm_numa_merge(int mapping, int prot, int timeout, bool merge_across_nodes,
394 void *numa1_map_ptr, *numa2_map_ptr;
395 struct timespec start_time;
399 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
400 perror("clock_gettime");
404 if (numa_available() < 0) {
405 perror("NUMA support not enabled");
408 if (numa_num_configured_nodes() <= 1) {
409 printf("At least 2 NUMA nodes must be available\n");
412 if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
415 /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */
416 first_node = get_first_mem_node();
417 numa1_map_ptr = numa_alloc_onnode(page_size, first_node);
418 numa2_map_ptr = numa_alloc_onnode(page_size, get_next_mem_node(first_node));
419 if (!numa1_map_ptr || !numa2_map_ptr) {
420 perror("numa_alloc_onnode");
424 memset(numa1_map_ptr, '*', page_size);
425 memset(numa2_map_ptr, '*', page_size);
427 /* try to merge the pages */
428 if (ksm_merge_pages(numa1_map_ptr, page_size, start_time, timeout) ||
429 ksm_merge_pages(numa2_map_ptr, page_size, start_time, timeout))
433 * verify that the right number of pages are merged:
434 * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged;
435 * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is
436 * only 1 unique page in each node and they can't be shared.
438 if (merge_across_nodes && !assert_ksm_pages_count(page_count))
440 else if (!merge_across_nodes && !assert_ksm_pages_count(0))
443 numa_free(numa1_map_ptr, page_size);
444 numa_free(numa2_map_ptr, page_size);
449 numa_free(numa1_map_ptr, page_size);
450 numa_free(numa2_map_ptr, page_size);
455 static int ksm_merge_hugepages_time(int mapping, int prot, int timeout, size_t map_size)
457 void *map_ptr, *map_ptr_orig;
458 struct timespec start_time, end_time;
459 unsigned long scan_time_ns;
460 int pagemap_fd, n_normal_pages, n_huge_pages;
463 size_t len = map_size;
465 len -= len % HPAGE_SIZE;
466 map_ptr_orig = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
467 MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
468 map_ptr = map_ptr_orig + HPAGE_SIZE - (uintptr_t)map_ptr_orig % HPAGE_SIZE;
470 if (map_ptr_orig == MAP_FAILED)
471 err(2, "initial mmap");
473 if (madvise(map_ptr, len + HPAGE_SIZE, MADV_HUGEPAGE))
474 err(2, "MADV_HUGEPAGE");
476 pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
478 err(2, "open pagemap");
482 for (void *p = map_ptr; p < map_ptr + len; p += HPAGE_SIZE) {
483 if (allocate_transhuge(p, pagemap_fd) < 0)
488 printf("Number of normal pages: %d\n", n_normal_pages);
489 printf("Number of huge pages: %d\n", n_huge_pages);
491 memset(map_ptr, '*', len);
493 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
494 perror("clock_gettime");
497 if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
499 if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
500 perror("clock_gettime");
504 scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
505 (end_time.tv_nsec - start_time.tv_nsec);
507 printf("Total size: %lu MiB\n", map_size / MB);
508 printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
509 scan_time_ns % NSEC_PER_SEC);
510 printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
511 ((double)scan_time_ns / NSEC_PER_SEC));
513 munmap(map_ptr_orig, len + HPAGE_SIZE);
518 munmap(map_ptr_orig, len + HPAGE_SIZE);
522 static int ksm_merge_time(int mapping, int prot, int timeout, size_t map_size)
525 struct timespec start_time, end_time;
526 unsigned long scan_time_ns;
530 map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
534 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
535 perror("clock_gettime");
538 if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
540 if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
541 perror("clock_gettime");
545 scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
546 (end_time.tv_nsec - start_time.tv_nsec);
548 printf("Total size: %lu MiB\n", map_size / MB);
549 printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
550 scan_time_ns % NSEC_PER_SEC);
551 printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
552 ((double)scan_time_ns / NSEC_PER_SEC));
554 munmap(map_ptr, map_size);
559 munmap(map_ptr, map_size);
563 static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
566 struct timespec start_time, end_time;
567 unsigned long cow_time_ns;
569 /* page_count must be less than 2*page_size */
570 size_t page_count = 4000;
572 map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
576 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
577 perror("clock_gettime");
580 for (size_t i = 0; i < page_count - 1; i = i + 2)
581 memset(map_ptr + page_size * i, '-', 1);
582 if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
583 perror("clock_gettime");
587 cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
588 (end_time.tv_nsec - start_time.tv_nsec);
590 printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
591 printf("Not merged pages:\n");
592 printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
593 cow_time_ns % NSEC_PER_SEC);
594 printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
595 ((double)cow_time_ns / NSEC_PER_SEC));
597 /* Create 2000 pairs of duplicate pages */
598 for (size_t i = 0; i < page_count - 1; i = i + 2) {
599 memset(map_ptr + page_size * i, '+', i / 2 + 1);
600 memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);
602 if (ksm_merge_pages(map_ptr, page_size * page_count, start_time, timeout))
605 if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
606 perror("clock_gettime");
609 for (size_t i = 0; i < page_count - 1; i = i + 2)
610 memset(map_ptr + page_size * i, '-', 1);
611 if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
612 perror("clock_gettime");
616 cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
617 (end_time.tv_nsec - start_time.tv_nsec);
619 printf("Merged pages:\n");
620 printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
621 cow_time_ns % NSEC_PER_SEC);
622 printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
623 ((double)cow_time_ns / NSEC_PER_SEC));
625 munmap(map_ptr, page_size * page_count);
630 munmap(map_ptr, page_size * page_count);
634 int main(int argc, char *argv[])
638 int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
639 long page_count = KSM_PAGE_COUNT_DEFAULT;
640 size_t page_size = sysconf(_SC_PAGESIZE);
641 struct ksm_sysfs ksm_sysfs_old;
642 int test_name = CHECK_KSM_MERGE;
643 bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
644 bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
647 while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) {
650 prot = str_to_prot(optarg);
653 page_count = atol(optarg);
654 if (page_count <= 0) {
655 printf("The number of pages must be greater than 0\n");
660 ksm_scan_limit_sec = atoi(optarg);
661 if (ksm_scan_limit_sec <= 0) {
662 printf("Timeout value must be greater than 0\n");
670 if (strcmp(optarg, "0") == 0)
676 if (strcmp(optarg, "0") == 0)
677 merge_across_nodes = 0;
679 merge_across_nodes = 1;
682 size_MB = atoi(optarg);
684 printf("Size must be greater than 0\n");
690 test_name = CHECK_KSM_UNMERGE;
693 test_name = CHECK_KSM_ZERO_PAGE_MERGE;
696 test_name = CHECK_KSM_NUMA_MERGE;
699 test_name = KSM_MERGE_TIME;
702 test_name = KSM_MERGE_TIME_HUGE_PAGES;
705 test_name = KSM_COW_TIME;
713 prot = str_to_prot(KSM_PROT_STR_DEFAULT);
715 if (access(KSM_SYSFS_PATH, F_OK)) {
716 printf("Config KSM not enabled\n");
720 if (ksm_save_def(&ksm_sysfs_old)) {
721 printf("Cannot save default tunables\n");
725 if (ksm_write_sysfs(KSM_FP("run"), 2) ||
726 ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
727 numa_available() ? 0 :
728 ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
729 ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
733 case CHECK_KSM_MERGE:
734 ret = check_ksm_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
735 ksm_scan_limit_sec, page_size);
737 case CHECK_KSM_UNMERGE:
738 ret = check_ksm_unmerge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
741 case CHECK_KSM_ZERO_PAGE_MERGE:
742 ret = check_ksm_zero_page_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
743 ksm_scan_limit_sec, use_zero_pages, page_size);
745 case CHECK_KSM_NUMA_MERGE:
746 ret = check_ksm_numa_merge(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
747 merge_across_nodes, page_size);
751 printf("Option '-s' is required.\n");
754 ret = ksm_merge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
757 case KSM_MERGE_TIME_HUGE_PAGES:
759 printf("Option '-s' is required.\n");
762 ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot,
763 ksm_scan_limit_sec, size_MB);
766 ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
771 if (ksm_restore(&ksm_sysfs_old)) {
772 printf("Cannot restore default tunables\n");