Merge tag 'for-linus' of git://git.armlinux.org.uk/~rmk/linux-arm
[platform/kernel/linux-rpi.git] / tools / testing / selftests / kvm / demand_paging_test.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * KVM demand paging test
4  * Adapted from dirty_log_test.c
5  *
6  * Copyright (C) 2018, Red Hat, Inc.
7  * Copyright (C) 2019, Google, Inc.
8  */
9
10 #define _GNU_SOURCE /* for pipe2 */
11
12 #include <inttypes.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <time.h>
16 #include <poll.h>
17 #include <pthread.h>
18 #include <linux/userfaultfd.h>
19 #include <sys/syscall.h>
20
21 #include "kvm_util.h"
22 #include "test_util.h"
23 #include "perf_test_util.h"
24 #include "guest_modes.h"
25
26 #ifdef __NR_userfaultfd
27
28 #ifdef PRINT_PER_PAGE_UPDATES
29 #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
30 #else
31 #define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
32 #endif
33
34 #ifdef PRINT_PER_VCPU_UPDATES
35 #define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
36 #else
37 #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
38 #endif
39
40 static int nr_vcpus = 1;
41 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
42 static size_t demand_paging_size;
43 static char *guest_data_prototype;
44
45 static void *vcpu_worker(void *data)
46 {
47         int ret;
48         struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
49         int vcpu_id = vcpu_args->vcpu_id;
50         struct kvm_vm *vm = perf_test_args.vm;
51         struct kvm_run *run;
52         struct timespec start;
53         struct timespec ts_diff;
54
55         run = vcpu_state(vm, vcpu_id);
56
57         clock_gettime(CLOCK_MONOTONIC, &start);
58
59         /* Let the guest access its memory */
60         ret = _vcpu_run(vm, vcpu_id);
61         TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
62         if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
63                 TEST_ASSERT(false,
64                             "Invalid guest sync status: exit_reason=%s\n",
65                             exit_reason_str(run->exit_reason));
66         }
67
68         ts_diff = timespec_elapsed(start);
69         PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
70                        ts_diff.tv_sec, ts_diff.tv_nsec);
71
72         return NULL;
73 }
74
75 static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
76 {
77         pid_t tid = syscall(__NR_gettid);
78         struct timespec start;
79         struct timespec ts_diff;
80         int r;
81
82         clock_gettime(CLOCK_MONOTONIC, &start);
83
84         if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
85                 struct uffdio_copy copy;
86
87                 copy.src = (uint64_t)guest_data_prototype;
88                 copy.dst = addr;
89                 copy.len = demand_paging_size;
90                 copy.mode = 0;
91
92                 r = ioctl(uffd, UFFDIO_COPY, &copy);
93                 if (r == -1) {
94                         pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
95                                 addr, tid, errno);
96                         return r;
97                 }
98         } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
99                 struct uffdio_continue cont = {0};
100
101                 cont.range.start = addr;
102                 cont.range.len = demand_paging_size;
103
104                 r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
105                 if (r == -1) {
106                         pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
107                                 addr, tid, errno);
108                         return r;
109                 }
110         } else {
111                 TEST_FAIL("Invalid uffd mode %d", uffd_mode);
112         }
113
114         ts_diff = timespec_elapsed(start);
115
116         PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
117                        timespec_to_ns(ts_diff));
118         PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
119                        demand_paging_size, addr, tid);
120
121         return 0;
122 }
123
124 bool quit_uffd_thread;
125
126 struct uffd_handler_args {
127         int uffd_mode;
128         int uffd;
129         int pipefd;
130         useconds_t delay;
131 };
132
133 static void *uffd_handler_thread_fn(void *arg)
134 {
135         struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
136         int uffd = uffd_args->uffd;
137         int pipefd = uffd_args->pipefd;
138         useconds_t delay = uffd_args->delay;
139         int64_t pages = 0;
140         struct timespec start;
141         struct timespec ts_diff;
142
143         clock_gettime(CLOCK_MONOTONIC, &start);
144         while (!quit_uffd_thread) {
145                 struct uffd_msg msg;
146                 struct pollfd pollfd[2];
147                 char tmp_chr;
148                 int r;
149                 uint64_t addr;
150
151                 pollfd[0].fd = uffd;
152                 pollfd[0].events = POLLIN;
153                 pollfd[1].fd = pipefd;
154                 pollfd[1].events = POLLIN;
155
156                 r = poll(pollfd, 2, -1);
157                 switch (r) {
158                 case -1:
159                         pr_info("poll err");
160                         continue;
161                 case 0:
162                         continue;
163                 case 1:
164                         break;
165                 default:
166                         pr_info("Polling uffd returned %d", r);
167                         return NULL;
168                 }
169
170                 if (pollfd[0].revents & POLLERR) {
171                         pr_info("uffd revents has POLLERR");
172                         return NULL;
173                 }
174
175                 if (pollfd[1].revents & POLLIN) {
176                         r = read(pollfd[1].fd, &tmp_chr, 1);
177                         TEST_ASSERT(r == 1,
178                                     "Error reading pipefd in UFFD thread\n");
179                         return NULL;
180                 }
181
182                 if (!(pollfd[0].revents & POLLIN))
183                         continue;
184
185                 r = read(uffd, &msg, sizeof(msg));
186                 if (r == -1) {
187                         if (errno == EAGAIN)
188                                 continue;
189                         pr_info("Read of uffd got errno %d\n", errno);
190                         return NULL;
191                 }
192
193                 if (r != sizeof(msg)) {
194                         pr_info("Read on uffd returned unexpected size: %d bytes", r);
195                         return NULL;
196                 }
197
198                 if (!(msg.event & UFFD_EVENT_PAGEFAULT))
199                         continue;
200
201                 if (delay)
202                         usleep(delay);
203                 addr =  msg.arg.pagefault.address;
204                 r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
205                 if (r < 0)
206                         return NULL;
207                 pages++;
208         }
209
210         ts_diff = timespec_elapsed(start);
211         PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
212                        pages, ts_diff.tv_sec, ts_diff.tv_nsec,
213                        pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
214
215         return NULL;
216 }
217
218 static void setup_demand_paging(struct kvm_vm *vm,
219                                 pthread_t *uffd_handler_thread, int pipefd,
220                                 int uffd_mode, useconds_t uffd_delay,
221                                 struct uffd_handler_args *uffd_args,
222                                 void *hva, void *alias, uint64_t len)
223 {
224         bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
225         int uffd;
226         struct uffdio_api uffdio_api;
227         struct uffdio_register uffdio_register;
228         uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
229
230         PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
231                        is_minor ? "MINOR" : "MISSING",
232                        is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
233
234         /* In order to get minor faults, prefault via the alias. */
235         if (is_minor) {
236                 size_t p;
237
238                 expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
239
240                 TEST_ASSERT(alias != NULL, "Alias required for minor faults");
241                 for (p = 0; p < (len / demand_paging_size); ++p) {
242                         memcpy(alias + (p * demand_paging_size),
243                                guest_data_prototype, demand_paging_size);
244                 }
245         }
246
247         uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
248         TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
249
250         uffdio_api.api = UFFD_API;
251         uffdio_api.features = 0;
252         TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
253                     "ioctl UFFDIO_API failed: %" PRIu64,
254                     (uint64_t)uffdio_api.api);
255
256         uffdio_register.range.start = (uint64_t)hva;
257         uffdio_register.range.len = len;
258         uffdio_register.mode = uffd_mode;
259         TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
260                     "ioctl UFFDIO_REGISTER failed");
261         TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
262                     expected_ioctls, "missing userfaultfd ioctls");
263
264         uffd_args->uffd_mode = uffd_mode;
265         uffd_args->uffd = uffd;
266         uffd_args->pipefd = pipefd;
267         uffd_args->delay = uffd_delay;
268         pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
269                        uffd_args);
270
271         PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
272                        hva, hva + len);
273 }
274
275 struct test_params {
276         int uffd_mode;
277         useconds_t uffd_delay;
278         enum vm_mem_backing_src_type src_type;
279         bool partition_vcpu_memory_access;
280 };
281
282 static void run_test(enum vm_guest_mode mode, void *arg)
283 {
284         struct test_params *p = arg;
285         pthread_t *vcpu_threads;
286         pthread_t *uffd_handler_threads = NULL;
287         struct uffd_handler_args *uffd_args = NULL;
288         struct timespec start;
289         struct timespec ts_diff;
290         int *pipefds = NULL;
291         struct kvm_vm *vm;
292         int vcpu_id;
293         int r;
294
295         vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
296                                  p->src_type);
297
298         perf_test_args.wr_fract = 1;
299
300         demand_paging_size = get_backing_src_pagesz(p->src_type);
301
302         guest_data_prototype = malloc(demand_paging_size);
303         TEST_ASSERT(guest_data_prototype,
304                     "Failed to allocate buffer for guest data pattern");
305         memset(guest_data_prototype, 0xAB, demand_paging_size);
306
307         vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
308         TEST_ASSERT(vcpu_threads, "Memory allocation failed");
309
310         perf_test_setup_vcpus(vm, nr_vcpus, guest_percpu_mem_size,
311                               p->partition_vcpu_memory_access);
312
313         if (p->uffd_mode) {
314                 uffd_handler_threads =
315                         malloc(nr_vcpus * sizeof(*uffd_handler_threads));
316                 TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
317
318                 uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
319                 TEST_ASSERT(uffd_args, "Memory allocation failed");
320
321                 pipefds = malloc(sizeof(int) * nr_vcpus * 2);
322                 TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
323
324                 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
325                         vm_paddr_t vcpu_gpa;
326                         void *vcpu_hva;
327                         void *vcpu_alias;
328                         uint64_t vcpu_mem_size;
329
330
331                         if (p->partition_vcpu_memory_access) {
332                                 vcpu_gpa = guest_test_phys_mem +
333                                            (vcpu_id * guest_percpu_mem_size);
334                                 vcpu_mem_size = guest_percpu_mem_size;
335                         } else {
336                                 vcpu_gpa = guest_test_phys_mem;
337                                 vcpu_mem_size = guest_percpu_mem_size * nr_vcpus;
338                         }
339                         PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
340                                        vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_mem_size);
341
342                         /* Cache the host addresses of the region */
343                         vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
344                         vcpu_alias = addr_gpa2alias(vm, vcpu_gpa);
345
346                         /*
347                          * Set up user fault fd to handle demand paging
348                          * requests.
349                          */
350                         r = pipe2(&pipefds[vcpu_id * 2],
351                                   O_CLOEXEC | O_NONBLOCK);
352                         TEST_ASSERT(!r, "Failed to set up pipefd");
353
354                         setup_demand_paging(vm, &uffd_handler_threads[vcpu_id],
355                                             pipefds[vcpu_id * 2], p->uffd_mode,
356                                             p->uffd_delay, &uffd_args[vcpu_id],
357                                             vcpu_hva, vcpu_alias,
358                                             vcpu_mem_size);
359                 }
360         }
361
362         /* Export the shared variables to the guest */
363         sync_global_to_guest(vm, perf_test_args);
364
365         pr_info("Finished creating vCPUs and starting uffd threads\n");
366
367         clock_gettime(CLOCK_MONOTONIC, &start);
368
369         for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
370                 pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
371                                &perf_test_args.vcpu_args[vcpu_id]);
372         }
373
374         pr_info("Started all vCPUs\n");
375
376         /* Wait for the vcpu threads to quit */
377         for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
378                 pthread_join(vcpu_threads[vcpu_id], NULL);
379                 PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
380         }
381
382         ts_diff = timespec_elapsed(start);
383
384         pr_info("All vCPU threads joined\n");
385
386         if (p->uffd_mode) {
387                 char c;
388
389                 /* Tell the user fault fd handler threads to quit */
390                 for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
391                         r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
392                         TEST_ASSERT(r == 1, "Unable to write to pipefd");
393
394                         pthread_join(uffd_handler_threads[vcpu_id], NULL);
395                 }
396         }
397
398         pr_info("Total guest execution time: %ld.%.9lds\n",
399                 ts_diff.tv_sec, ts_diff.tv_nsec);
400         pr_info("Overall demand paging rate: %f pgs/sec\n",
401                 perf_test_args.vcpu_args[0].pages * nr_vcpus /
402                 ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
403
404         perf_test_destroy_vm(vm);
405
406         free(guest_data_prototype);
407         free(vcpu_threads);
408         if (p->uffd_mode) {
409                 free(uffd_handler_threads);
410                 free(uffd_args);
411                 free(pipefds);
412         }
413 }
414
415 static void help(char *name)
416 {
417         puts("");
418         printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
419                "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
420         guest_modes_help();
421         printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
422                "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
423         printf(" -d: add a delay in usec to the User Fault\n"
424                "     FD handler to simulate demand paging\n"
425                "     overheads. Ignored without -u.\n");
426         printf(" -b: specify the size of the memory region which should be\n"
427                "     demand paged by each vCPU. e.g. 10M or 3G.\n"
428                "     Default: 1G\n");
429         backing_src_help("-s");
430         printf(" -v: specify the number of vCPUs to run.\n");
431         printf(" -o: Overlap guest memory accesses instead of partitioning\n"
432                "     them into a separate region of memory for each vCPU.\n");
433         puts("");
434         exit(0);
435 }
436
437 int main(int argc, char *argv[])
438 {
439         int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
440         struct test_params p = {
441                 .src_type = DEFAULT_VM_MEM_SRC,
442                 .partition_vcpu_memory_access = true,
443         };
444         int opt;
445
446         guest_modes_append_default();
447
448         while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
449                 switch (opt) {
450                 case 'm':
451                         guest_modes_cmdline(optarg);
452                         break;
453                 case 'u':
454                         if (!strcmp("MISSING", optarg))
455                                 p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
456                         else if (!strcmp("MINOR", optarg))
457                                 p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
458                         TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
459                         break;
460                 case 'd':
461                         p.uffd_delay = strtoul(optarg, NULL, 0);
462                         TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
463                         break;
464                 case 'b':
465                         guest_percpu_mem_size = parse_size(optarg);
466                         break;
467                 case 's':
468                         p.src_type = parse_backing_src_type(optarg);
469                         break;
470                 case 'v':
471                         nr_vcpus = atoi(optarg);
472                         TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
473                                     "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
474                         break;
475                 case 'o':
476                         p.partition_vcpu_memory_access = false;
477                         break;
478                 case 'h':
479                 default:
480                         help(argv[0]);
481                         break;
482                 }
483         }
484
485         if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
486             !backing_src_is_shared(p.src_type)) {
487                 TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
488         }
489
490         for_each_guest_mode(run_test, &p);
491
492         return 0;
493 }
494
495 #else /* __NR_userfaultfd */
496
497 #warning "missing __NR_userfaultfd definition"
498
499 int main(void)
500 {
501         print_skip("__NR_userfaultfd must be present for userfaultfd test");
502         return KSFT_SKIP;
503 }
504
505 #endif /* __NR_userfaultfd */