2 * This application is Copyright 2012 Red Hat, Inc.
3 * Doug Ledford <dledford@redhat.com>
5 * mq_perf_tests is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, version 3.
9 * mq_perf_tests is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * For the full text of the license, see <http://www.gnu.org/licenses/>.
17 * Tests various types of message queue workloads, concentrating on those
18 * situations that invole large message sizes, large message queue depths,
19 * or both, and reports back useful metrics about kernel message queue
34 #include <sys/types.h>
36 #include <sys/resource.h>
42 #include "../kselftest.h"
46 " %s [-c #[,#..] -f] path\n"
48 " -c # Skip most tests and go straight to a high queue depth test\n"
49 " and then run that test continuously (useful for running at\n"
50 " the same time as some other workload to see how much the\n"
51 " cache thrashing caused by adding messages to a very deep\n"
52 " queue impacts the performance of other programs). The number\n"
53 " indicates which CPU core we should bind the process to during\n"
54 " the run. If you have more than one physical CPU, then you\n"
55 " will need one copy per physical CPU package, and you should\n"
56 " specify the CPU cores to pin ourself to via a comma separated\n"
57 " list of CPU values.\n"
58 " -f Only usable with continuous mode. Pin ourself to the CPUs\n"
59 " as requested, then instead of looping doing a high mq\n"
60 " workload, just busy loop. This will allow us to lock up a\n"
61 " single CPU just like we normally would, but without actually\n"
62 " thrashing the CPU cache. This is to make it easier to get\n"
63 " comparable numbers from some other workload running on the\n"
64 " other CPUs. One set of numbers with # CPUs locked up running\n"
65 " an mq workload, and another set of numbers with those same\n"
66 " CPUs locked away from the test workload, but not doing\n"
67 " anything to trash the cache like the mq workload might.\n"
68 " path Path name of the message queue to create\n"
70 " Note: this program must be run as root in order to enable all tests\n"
73 char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
74 char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
76 #define min(a, b) ((a) < (b) ? (a) : (b))
78 char *cpu_option_string;
79 int cpus_to_pin[MAX_CPUS];
81 pthread_t cpu_threads[MAX_CPUS];
82 pthread_t main_thread;
88 #define TEST1_LOOPS 10000000
89 #define TEST2_LOOPS 100000
91 int continuous_mode_fake;
93 struct rlimit saved_limits, cur_limits;
94 int saved_max_msgs, saved_max_msgsize;
95 int cur_max_msgs, cur_max_msgsize;
96 FILE *max_msgs, *max_msgsize;
98 char *queue_path = "/mq_perf_tests";
100 struct mq_attr result;
103 const struct poptOption options[] = {
105 .longName = "continuous",
107 .argInfo = POPT_ARG_STRING,
108 .arg = &cpu_option_string,
110 .descrip = "Run continuous tests at a high queue depth in "
111 "order to test the effects of cache thrashing on "
112 "other tasks on the system. This test is intended "
113 "to be run on one core of each physical CPU while "
114 "some other CPU intensive task is run on all the other "
115 "cores of that same physical CPU and the other task "
116 "is timed. It is assumed that the process of adding "
117 "messages to the message queue in a tight loop will "
118 "impact that other task to some degree. Once the "
119 "tests are performed in this way, you should then "
120 "re-run the tests using fake mode in order to check "
121 "the difference in time required to perform the CPU "
123 .argDescrip = "cpu[,cpu]",
128 .argInfo = POPT_ARG_NONE,
129 .arg = &continuous_mode_fake,
131 .descrip = "Tie up the CPUs that we would normally tie up in"
132 "continuous mode, but don't actually do any mq stuff, "
133 "just keep the CPU busy so it can't be used to process "
134 "system level tasks as this would free up resources on "
135 "the other CPU cores and skew the comparison between "
136 "the no-mqueue work and mqueue work tests",
142 .argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT,
145 .descrip = "The name of the path to use in the mqueue "
146 "filesystem for our tests",
147 .argDescrip = "pathname",
153 static inline void __set(FILE *stream, int value, char *err_msg);
154 void shutdown(int exit_val, char *err_cause, int line_no);
155 void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context);
156 void sig_action(int signum, siginfo_t *info, void *context);
157 static inline int get(FILE *stream);
158 static inline void set(FILE *stream, int value);
159 static inline int try_set(FILE *stream, int value);
160 static inline void getr(int type, struct rlimit *rlim);
161 static inline void setr(int type, struct rlimit *rlim);
162 static inline void open_queue(struct mq_attr *attr);
163 void increase_limits(void);
165 static inline void __set(FILE *stream, int value, char *err_msg)
168 if (fprintf(stream, "%d", value) < 0)
173 void shutdown(int exit_val, char *err_cause, int line_no)
175 static int in_shutdown = 0;
176 int errno_at_shutdown = errno;
179 /* In case we get called by multiple threads or from an sighandler */
183 /* Free the cpu_set allocated using CPU_ALLOC in main function */
186 for (i = 0; i < num_cpus_to_pin; i++)
187 if (cpu_threads[i]) {
188 pthread_kill(cpu_threads[i], SIGUSR1);
189 pthread_join(cpu_threads[i], NULL);
194 perror("mq_close() during shutdown");
197 * Be silent if this fails, if we cleaned up already it's
200 mq_unlink(queue_path);
202 __set(max_msgs, saved_max_msgs,
203 "failed to restore saved_max_msgs");
204 if (saved_max_msgsize)
205 __set(max_msgsize, saved_max_msgsize,
206 "failed to restore saved_max_msgsize");
208 error(exit_val, errno_at_shutdown, "%s at %d",
213 void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context)
215 if (pthread_self() != main_thread)
218 fprintf(stderr, "Caught signal %d in SIGUSR1 handler, "
219 "exiting\n", signum);
221 fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
226 void sig_action(int signum, siginfo_t *info, void *context)
228 if (pthread_self() != main_thread)
229 pthread_kill(main_thread, signum);
231 fprintf(stderr, "Caught signal %d, exiting\n", signum);
233 fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
238 static inline int get(FILE *stream)
242 if (fscanf(stream, "%d", &value) != 1)
243 shutdown(4, "Error reading /proc entry", __LINE__);
247 static inline void set(FILE *stream, int value)
252 if (fprintf(stream, "%d", value) < 0)
253 return shutdown(5, "Failed writing to /proc file", __LINE__);
254 new_value = get(stream);
255 if (new_value != value)
256 return shutdown(5, "We didn't get what we wrote to /proc back",
260 static inline int try_set(FILE *stream, int value)
265 fprintf(stream, "%d", value);
266 new_value = get(stream);
267 return new_value == value;
270 static inline void getr(int type, struct rlimit *rlim)
272 if (getrlimit(type, rlim))
273 shutdown(6, "getrlimit()", __LINE__);
276 static inline void setr(int type, struct rlimit *rlim)
278 if (setrlimit(type, rlim))
279 shutdown(7, "setrlimit()", __LINE__);
283 * open_queue - open the global queue for testing
284 * @attr - An attr struct specifying the desired queue traits
285 * @result - An attr struct that lists the actual traits the queue has
287 * This open is not allowed to fail, failure will result in an orderly
288 * shutdown of the program. The global queue_path is used to set what
289 * queue to open, the queue descriptor is saved in the global queue
292 static inline void open_queue(struct mq_attr *attr)
294 int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK;
295 int perms = DEFFILEMODE;
297 queue = mq_open(queue_path, flags, perms, attr);
299 shutdown(1, "mq_open()", __LINE__);
300 if (mq_getattr(queue, &result))
301 shutdown(1, "mq_getattr()", __LINE__);
302 printf("\n\tQueue %s created:\n", queue_path);
303 printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ?
304 "O_NONBLOCK" : "(null)");
305 printf("\t\tmq_maxmsg:\t\t\t%lu\n", result.mq_maxmsg);
306 printf("\t\tmq_msgsize:\t\t\t%lu\n", result.mq_msgsize);
307 printf("\t\tmq_curmsgs:\t\t\t%lu\n", result.mq_curmsgs);
310 void *fake_cont_thread(void *arg)
314 for (i = 0; i < num_cpus_to_pin; i++)
315 if (cpu_threads[i] == pthread_self())
317 printf("\tStarted fake continuous mode thread %d on CPU %d\n", i,
323 void *cont_thread(void *arg)
328 for (i = 0; i < num_cpus_to_pin; i++)
329 if (cpu_threads[i] == pthread_self())
331 printf("\tStarted continuous mode thread %d on CPU %d\n", i,
334 while (mq_send(queue, buff, sizeof(buff), 0) == 0)
336 mq_receive(queue, buff, sizeof(buff), &priority);
340 #define drain_queue() \
341 while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE)
343 #define do_untimed_send() \
345 if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
346 shutdown(3, "Test send failure", __LINE__); \
349 #define do_send_recv() \
351 clock_gettime(clock, &start); \
352 if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
353 shutdown(3, "Test send failure", __LINE__); \
354 clock_gettime(clock, &middle); \
355 if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \
356 shutdown(3, "Test receive failure", __LINE__); \
357 clock_gettime(clock, &end); \
358 nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \
359 (middle.tv_nsec - start.tv_nsec); \
360 send_total.tv_nsec += nsec; \
361 if (send_total.tv_nsec >= 1000000000) { \
362 send_total.tv_sec++; \
363 send_total.tv_nsec -= 1000000000; \
365 nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \
366 (end.tv_nsec - middle.tv_nsec); \
367 recv_total.tv_nsec += nsec; \
368 if (recv_total.tv_nsec >= 1000000000) { \
369 recv_total.tv_sec++; \
370 recv_total.tv_nsec -= 1000000000; \
379 void const_prio(int *prio)
384 void inc_prio(int *prio)
386 if (++*prio == mq_prio_max)
390 void dec_prio(int *prio)
393 *prio = mq_prio_max - 1;
396 void random_prio(int *prio)
398 *prio = random() % mq_prio_max;
401 struct test test2[] = {
402 {"\n\tTest #2a: Time send/recv message, queue full, constant prio\n",
404 {"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n",
406 {"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n",
408 {"\n\tTest #2d: Time send/recv message, queue full, random prio\n",
414 * Tests to perform (all done with MSG_SIZE messages):
416 * 1) Time to add/remove message with 0 messages on queue
417 * 1a) with constant prio
418 * 2) Time to add/remove message when queue close to capacity:
419 * 2a) with constant prio
420 * 2b) with increasing prio
421 * 2c) with decreasing prio
422 * 2d) with random prio
423 * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX)
425 void *perf_test_thread(void *arg)
428 int prio_out, prio_in;
432 struct timespec res, start, middle, end, send_total, recv_total;
433 unsigned long long nsec;
434 struct test *cur_test;
437 printf("\n\tStarted mqueue performance test thread on CPU %d\n",
439 mq_prio_max = sysconf(_SC_MQ_PRIO_MAX);
440 if (mq_prio_max == -1)
441 shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__);
442 if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0)
443 shutdown(2, "pthread_getcpuclockid", __LINE__);
445 if (clock_getres(clock, &res))
446 shutdown(2, "clock_getres()", __LINE__);
448 printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max);
449 printf("\t\tClock resolution:\t\t%lu nsec%s\n", res.tv_nsec,
450 res.tv_nsec > 1 ? "s" : "");
454 printf("\n\tTest #1: Time send/recv message, queue empty\n");
455 printf("\t\t(%d iterations)\n", TEST1_LOOPS);
457 send_total.tv_sec = 0;
458 send_total.tv_nsec = 0;
459 recv_total.tv_sec = 0;
460 recv_total.tv_nsec = 0;
461 for (i = 0; i < TEST1_LOOPS; i++)
463 printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
464 send_total.tv_sec, send_total.tv_nsec);
465 nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
466 send_total.tv_nsec) / TEST1_LOOPS;
467 printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
468 printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
469 recv_total.tv_sec, recv_total.tv_nsec);
470 nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
471 recv_total.tv_nsec) / TEST1_LOOPS;
472 printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
475 for (cur_test = test2; cur_test->desc != NULL; cur_test++) {
476 printf("%s:\n", cur_test->desc);
477 printf("\t\t(%d iterations)\n", TEST2_LOOPS);
479 send_total.tv_sec = 0;
480 send_total.tv_nsec = 0;
481 recv_total.tv_sec = 0;
482 recv_total.tv_nsec = 0;
483 printf("\t\tFilling queue...");
485 clock_gettime(clock, &start);
486 for (i = 0; i < result.mq_maxmsg - 1; i++) {
488 cur_test->func(&prio_out);
490 clock_gettime(clock, &end);
491 nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
492 1000000000) + (end.tv_nsec - start.tv_nsec);
493 printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
495 printf("\t\tTesting...");
497 for (i = 0; i < TEST2_LOOPS; i++) {
499 cur_test->func(&prio_out);
502 printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
503 send_total.tv_sec, send_total.tv_nsec);
504 nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
505 send_total.tv_nsec) / TEST2_LOOPS;
506 printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
507 printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
508 recv_total.tv_sec, recv_total.tv_nsec);
509 nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
510 recv_total.tv_nsec) / TEST2_LOOPS;
511 printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
512 printf("\t\tDraining queue...");
514 clock_gettime(clock, &start);
516 clock_gettime(clock, &end);
517 nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
518 1000000000) + (end.tv_nsec - start.tv_nsec);
519 printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
525 void increase_limits(void)
527 cur_limits.rlim_cur = RLIM_INFINITY;
528 cur_limits.rlim_max = RLIM_INFINITY;
529 setr(RLIMIT_MSGQUEUE, &cur_limits);
530 while (try_set(max_msgs, cur_max_msgs += 10))
532 cur_max_msgs = get(max_msgs);
533 while (try_set(max_msgsize, cur_max_msgsize += 1024))
535 cur_max_msgsize = get(max_msgsize);
536 if (setpriority(PRIO_PROCESS, 0, -20) != 0)
537 shutdown(2, "setpriority()", __LINE__);
541 int main(int argc, char *argv[])
544 char *option, *next_option;
547 poptContext popt_context;
550 main_thread = pthread_self();
553 if (sysconf(_SC_NPROCESSORS_ONLN) == -1) {
554 perror("sysconf(_SC_NPROCESSORS_ONLN)");
559 ksft_exit_skip("Not running as root, but almost all tests "
560 "require root in order to modify\nsystem settings. "
563 cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
564 cpu_set = CPU_ALLOC(cpus_online);
565 if (cpu_set == NULL) {
566 perror("CPU_ALLOC()");
569 cpu_set_size = CPU_ALLOC_SIZE(cpus_online);
570 CPU_ZERO_S(cpu_set_size, cpu_set);
572 popt_context = poptGetContext(NULL, argc, (const char **)argv,
575 while ((rc = poptGetNextOpt(popt_context)) > 0) {
579 option = cpu_option_string;
581 next_option = strchr(option, ',');
585 if (cpu >= cpus_online)
586 fprintf(stderr, "CPU %d exceeds "
587 "cpus online, ignoring.\n",
590 cpus_to_pin[num_cpus_to_pin++] = cpu;
592 option = ++next_option;
593 } while (next_option && num_cpus_to_pin < MAX_CPUS);
594 /* Double check that they didn't give us the same CPU
596 for (cpu = 0; cpu < num_cpus_to_pin; cpu++) {
597 if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size,
599 fprintf(stderr, "Any given CPU may "
600 "only be given once.\n");
603 CPU_SET_S(cpus_to_pin[cpu],
604 cpu_set_size, cpu_set);
609 * Although we can create a msg queue with a
610 * non-absolute path name, unlink will fail. So,
611 * if the name doesn't start with a /, add one
615 if (*option != '/') {
616 queue_path = malloc(strlen(option) + 2);
623 strcat(queue_path, option);
630 if (continuous_mode && num_cpus_to_pin == 0) {
631 fprintf(stderr, "Must pass at least one CPU to continuous "
633 poptPrintUsage(popt_context, stderr, 0);
635 } else if (!continuous_mode) {
637 cpus_to_pin[0] = cpus_online - 1;
640 max_msgs = fopen(MAX_MSGS, "r+");
641 max_msgsize = fopen(MAX_MSGSIZE, "r+");
643 shutdown(2, "Failed to open msg_max", __LINE__);
645 shutdown(2, "Failed to open msgsize_max", __LINE__);
647 /* Load up the current system values for everything we can */
648 getr(RLIMIT_MSGQUEUE, &saved_limits);
649 cur_limits = saved_limits;
650 saved_max_msgs = cur_max_msgs = get(max_msgs);
651 saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
653 cur_nice = getpriority(PRIO_PROCESS, 0);
655 shutdown(2, "getpriority()", __LINE__);
657 /* Tell the user our initial state */
658 printf("\nInitial system state:\n");
659 printf("\tUsing queue path:\t\t\t%s\n", queue_path);
660 printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
661 (long) saved_limits.rlim_cur);
662 printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
663 (long) saved_limits.rlim_max);
664 printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize);
665 printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs);
666 printf("\tNice value:\t\t\t\t%d\n", cur_nice);
671 printf("Adjusted system state for testing:\n");
672 if (cur_limits.rlim_cur == RLIM_INFINITY) {
673 printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n");
674 printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n");
676 printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
677 (long) cur_limits.rlim_cur);
678 printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
679 (long) cur_limits.rlim_max);
681 printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize);
682 printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs);
683 printf("\tNice value:\t\t\t\t%d\n", cur_nice);
684 printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ?
685 (continuous_mode_fake ? "fake mode" : "enabled") :
687 printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]);
688 for (cpu = 1; cpu < num_cpus_to_pin; cpu++)
689 printf(",%d", cpus_to_pin[cpu]);
692 sa.sa_sigaction = sig_action_SIGUSR1;
693 sigemptyset(&sa.sa_mask);
694 sigaddset(&sa.sa_mask, SIGHUP);
695 sigaddset(&sa.sa_mask, SIGINT);
696 sigaddset(&sa.sa_mask, SIGQUIT);
697 sigaddset(&sa.sa_mask, SIGTERM);
698 sa.sa_flags = SA_SIGINFO;
699 if (sigaction(SIGUSR1, &sa, NULL) == -1)
700 shutdown(1, "sigaction(SIGUSR1)", __LINE__);
701 sa.sa_sigaction = sig_action;
702 if (sigaction(SIGHUP, &sa, NULL) == -1)
703 shutdown(1, "sigaction(SIGHUP)", __LINE__);
704 if (sigaction(SIGINT, &sa, NULL) == -1)
705 shutdown(1, "sigaction(SIGINT)", __LINE__);
706 if (sigaction(SIGQUIT, &sa, NULL) == -1)
707 shutdown(1, "sigaction(SIGQUIT)", __LINE__);
708 if (sigaction(SIGTERM, &sa, NULL) == -1)
709 shutdown(1, "sigaction(SIGTERM)", __LINE__);
711 if (!continuous_mode_fake) {
712 attr.mq_flags = O_NONBLOCK;
713 attr.mq_maxmsg = cur_max_msgs;
714 attr.mq_msgsize = MSG_SIZE;
717 for (i = 0; i < num_cpus_to_pin; i++) {
718 pthread_attr_t thread_attr;
721 if (continuous_mode_fake)
722 thread_func = &fake_cont_thread;
723 else if (continuous_mode)
724 thread_func = &cont_thread;
726 thread_func = &perf_test_thread;
728 CPU_ZERO_S(cpu_set_size, cpu_set);
729 CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set);
730 pthread_attr_init(&thread_attr);
731 pthread_attr_setaffinity_np(&thread_attr, cpu_set_size,
733 if (pthread_create(&cpu_threads[i], &thread_attr, thread_func,
735 shutdown(1, "pthread_create()", __LINE__);
736 pthread_attr_destroy(&thread_attr);
739 if (!continuous_mode) {
740 pthread_join(cpu_threads[0], &retval);
741 shutdown((long)retval, "perf_test_thread()", __LINE__);