selftests/resctrl: Add callback to start a benchmark
authorSai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Thu, 16 Jan 2020 21:32:37 +0000 (13:32 -0800)
committerShuah Khan <skhan@linuxfoundation.org>
Tue, 11 Feb 2020 01:38:28 +0000 (18:38 -0700)
The callback starts a child process and puts the child pid in created
resctrl group with specified memory bandwidth in schemata. The child
starts running benchmark.

Signed-off-by: Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>
Co-developed-by: Babu Moger <babu.moger@amd.com>
Signed-off-by: Babu Moger <babu.moger@amd.com>
Co-developed-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
tools/testing/selftests/resctrl/resctrl.h
tools/testing/selftests/resctrl/resctrl_val.c

index ba98cd6efc64991d3f7594eda986325da49ffe2a..fb42087c904d65fc1065082928290997e80d0d4d 100644 (file)
@@ -3,6 +3,8 @@
 #ifndef RESCTRL_H
 #define RESCTRL_H
 #include <stdio.h>
+#include <stdarg.h>
+#include <math.h>
 #include <errno.h>
 #include <sched.h>
 #include <stdlib.h>
@@ -19,6 +21,7 @@
 #include <asm/unistd.h>
 #include <linux/perf_event.h>
 
+#define MB                     (1024 * 1024)
 #define RESCTRL_PATH           "/sys/fs/resctrl"
 #define PHYS_ID_PATH           "/sys/devices/system/cpu/cpu"
 
                exit(EXIT_FAILURE);             \
        } while (0)
 
+/*
+ * resctrl_val_param:  resctrl test parameters
+ * @resctrl_val:       Resctrl feature (Eg: mbm, mba.. etc)
+ * @ctrlgrp:           Name of the control monitor group (con_mon grp)
+ * @mongrp:            Name of the monitor group (mon grp)
+ * @cpu_no:            CPU number to which the benchmark would be binded
+ * @span:              Memory bytes accessed in each benchmark iteration
+ * @mum_resctrlfs:     Should the resctrl FS be remounted?
+ * @filename:          Name of file to which the o/p should be written
+ * @bw_report:         Bandwidth report type (reads vs writes)
+ * @setup:             Call back function to setup test environment
+ */
+struct resctrl_val_param {
+       char            *resctrl_val;
+       char            ctrlgrp[64];
+       char            mongrp[64];
+       int             cpu_no;
+       unsigned long   span;
+       int             mum_resctrlfs;
+       char            filename[64];
+       char            *bw_report;
+       int             (*setup)(int num, ...);
+};
+
 pid_t bm_pid, ppid;
 
 int remount_resctrlfs(bool mum_resctrlfs);
 int get_resource_id(int cpu_no, int *resource_id);
+int umount_resctrlfs(void);
 int validate_bw_report_request(char *bw_report);
 bool validate_resctrl_feature_request(char *resctrl_val);
 char *fgrep(FILE *inf, const char *str);
@@ -46,5 +74,6 @@ int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
                    int group_fd, unsigned long flags);
 int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush,
                 int op, char *resctrl_va);
+int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
 
 #endif /* RESCTRL_H */
index 0ca4c925251630e17e714f212e8edf156abe7632..0d1bd03ec4be786ee37eb2888d00cbedeecb0c4e 100644 (file)
@@ -47,6 +47,8 @@ struct imc_counter_config {
        int fd;
 };
 
+static char mbm_total_path[1024];
+static int imcs;
 static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
 
 void membw_initialize_perf_event_attr(int i, int j)
@@ -115,3 +117,575 @@ void get_event_and_umask(char *cas_count_cfg, int count, bool op)
                }
        }
 }
+
+static int open_perf_event(int i, int cpu_no, int j)
+{
+       imc_counters_config[i][j].fd =
+               perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
+                               PERF_FLAG_FD_CLOEXEC);
+
+       if (imc_counters_config[i][j].fd == -1) {
+               fprintf(stderr, "Error opening leader %llx\n",
+                       imc_counters_config[i][j].pe.config);
+
+               return -1;
+       }
+
+       return 0;
+}
+
+/* Get type and config (read and write) of an iMC counter */
+static int read_from_imc_dir(char *imc_dir, int count)
+{
+       char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
+       FILE *fp;
+
+       /* Get type of iMC counter */
+       sprintf(imc_counter_type, "%s%s", imc_dir, "type");
+       fp = fopen(imc_counter_type, "r");
+       if (!fp) {
+               perror("Failed to open imc counter type file");
+
+               return -1;
+       }
+       if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
+               perror("Could not get imc type");
+               fclose(fp);
+
+               return -1;
+       }
+       fclose(fp);
+
+       imc_counters_config[count][WRITE].type =
+                               imc_counters_config[count][READ].type;
+
+       /* Get read config */
+       sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
+       fp = fopen(imc_counter_cfg, "r");
+       if (!fp) {
+               perror("Failed to open imc config file");
+
+               return -1;
+       }
+       if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
+               perror("Could not get imc cas count read");
+               fclose(fp);
+
+               return -1;
+       }
+       fclose(fp);
+
+       get_event_and_umask(cas_count_cfg, count, READ);
+
+       /* Get write config */
+       sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
+       fp = fopen(imc_counter_cfg, "r");
+       if (!fp) {
+               perror("Failed to open imc config file");
+
+               return -1;
+       }
+       if  (fscanf(fp, "%s", cas_count_cfg) <= 0) {
+               perror("Could not get imc cas count write");
+               fclose(fp);
+
+               return -1;
+       }
+       fclose(fp);
+
+       get_event_and_umask(cas_count_cfg, count, WRITE);
+
+       return 0;
+}
+
+/*
+ * A system can have 'n' number of iMC (Integrated Memory Controller)
+ * counters, get that 'n'. For each iMC counter get it's type and config.
+ * Also, each counter has two configs, one for read and the other for write.
+ * A config again has two parts, event and umask.
+ * Enumerate all these details into an array of structures.
+ *
+ * Return: >= 0 on success. < 0 on failure.
+ */
+static int num_of_imcs(void)
+{
+       unsigned int count = 0;
+       char imc_dir[512];
+       struct dirent *ep;
+       int ret;
+       DIR *dp;
+
+       dp = opendir(DYN_PMU_PATH);
+       if (dp) {
+               while ((ep = readdir(dp))) {
+                       if (strstr(ep->d_name, UNCORE_IMC)) {
+                               sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
+                                       ep->d_name);
+                               ret = read_from_imc_dir(imc_dir, count);
+                               if (ret) {
+                                       closedir(dp);
+
+                                       return ret;
+                               }
+                               count++;
+                       }
+               }
+               closedir(dp);
+               if (count == 0) {
+                       perror("Unable find iMC counters!\n");
+
+                       return -1;
+               }
+       } else {
+               perror("Unable to open PMU directory!\n");
+
+               return -1;
+       }
+
+       return count;
+}
+
+static int initialize_mem_bw_imc(void)
+{
+       int imc, j;
+
+       imcs = num_of_imcs();
+       if (imcs <= 0)
+               return imcs;
+
+       /* Initialize perf_event_attr structures for all iMC's */
+       for (imc = 0; imc < imcs; imc++) {
+               for (j = 0; j < 2; j++)
+                       membw_initialize_perf_event_attr(imc, j);
+       }
+
+       return 0;
+}
+
+/*
+ * get_mem_bw_imc:     Memory band width as reported by iMC counters
+ * @cpu_no:            CPU number that the benchmark PID is binded to
+ * @bw_report:         Bandwidth report type (reads, writes)
+ *
+ * Memory B/W utilized by a process on a socket can be calculated using
+ * iMC counters. Perf events are used to read these counters.
+ *
+ * Return: >= 0 on success. < 0 on failure.
+ */
+static float get_mem_bw_imc(int cpu_no, char *bw_report)
+{
+       float reads, writes, of_mul_read, of_mul_write;
+       int imc, j, ret;
+
+       /* Start all iMC counters to log values (both read and write) */
+       reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
+       for (imc = 0; imc < imcs; imc++) {
+               for (j = 0; j < 2; j++) {
+                       ret = open_perf_event(imc, cpu_no, j);
+                       if (ret)
+                               return -1;
+               }
+               for (j = 0; j < 2; j++)
+                       membw_ioctl_perf_event_ioc_reset_enable(imc, j);
+       }
+
+       sleep(1);
+
+       /* Stop counters after a second to get results (both read and write) */
+       for (imc = 0; imc < imcs; imc++) {
+               for (j = 0; j < 2; j++)
+                       membw_ioctl_perf_event_ioc_disable(imc, j);
+       }
+
+       /*
+        * Get results which are stored in struct type imc_counter_config
+        * Take over flow into consideration before calculating total b/w
+        */
+       for (imc = 0; imc < imcs; imc++) {
+               struct imc_counter_config *r =
+                       &imc_counters_config[imc][READ];
+               struct imc_counter_config *w =
+                       &imc_counters_config[imc][WRITE];
+
+               if (read(r->fd, &r->return_value,
+                        sizeof(struct membw_read_format)) == -1) {
+                       perror("Couldn't get read b/w through iMC");
+
+                       return -1;
+               }
+
+               if (read(w->fd, &w->return_value,
+                        sizeof(struct membw_read_format)) == -1) {
+                       perror("Couldn't get write bw through iMC");
+
+                       return -1;
+               }
+
+               __u64 r_time_enabled = r->return_value.time_enabled;
+               __u64 r_time_running = r->return_value.time_running;
+
+               if (r_time_enabled != r_time_running)
+                       of_mul_read = (float)r_time_enabled /
+                                       (float)r_time_running;
+
+               __u64 w_time_enabled = w->return_value.time_enabled;
+               __u64 w_time_running = w->return_value.time_running;
+
+               if (w_time_enabled != w_time_running)
+                       of_mul_write = (float)w_time_enabled /
+                                       (float)w_time_running;
+               reads += r->return_value.value * of_mul_read * SCALE;
+               writes += w->return_value.value * of_mul_write * SCALE;
+       }
+
+       for (imc = 0; imc < imcs; imc++) {
+               close(imc_counters_config[imc][READ].fd);
+               close(imc_counters_config[imc][WRITE].fd);
+       }
+
+       if (strcmp(bw_report, "reads") == 0)
+               return reads;
+
+       if (strcmp(bw_report, "writes") == 0)
+               return writes;
+
+       return (reads + writes);
+}
+
+void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
+{
+       if (ctrlgrp && mongrp)
+               sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
+                       RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
+       else if (!ctrlgrp && mongrp)
+               sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+                       mongrp, resource_id);
+       else if (ctrlgrp && !mongrp)
+               sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+                       ctrlgrp, resource_id);
+       else if (!ctrlgrp && !mongrp)
+               sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+                       resource_id);
+}
+
+/*
+ * initialize_mem_bw_resctrl:  Appropriately populate "mbm_total_path"
+ * @ctrlgrp:                   Name of the control monitor group (con_mon grp)
+ * @mongrp:                    Name of the monitor group (mon grp)
+ * @cpu_no:                    CPU number that the benchmark PID is binded to
+ * @resctrl_val:               Resctrl feature (Eg: mbm, mba.. etc)
+ */
+static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
+                                     int cpu_no, char *resctrl_val)
+{
+       int resource_id;
+
+       if (get_resource_id(cpu_no, &resource_id) < 0) {
+               perror("Could not get resource_id");
+               return;
+       }
+
+       if (strcmp(resctrl_val, "mbm") == 0)
+               set_mbm_path(ctrlgrp, mongrp, resource_id);
+
+       if ((strcmp(resctrl_val, "mba") == 0)) {
+               if (ctrlgrp)
+                       sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
+                               RESCTRL_PATH, ctrlgrp, resource_id);
+               else
+                       sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
+                               RESCTRL_PATH, resource_id);
+       }
+}
+
+/*
+ * Get MBM Local bytes as reported by resctrl FS
+ * For MBM,
+ * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
+ * 2. If only con_mon grp is given, then read from con_mon grp
+ * 3. If both are not given, then read from root con_mon grp
+ * For MBA,
+ * 1. If con_mon grp is given, then read from it
+ * 2. If con_mon grp is not given, then read from root con_mon grp
+ */
+static unsigned long get_mem_bw_resctrl(void)
+{
+       unsigned long mbm_total = 0;
+       FILE *fp;
+
+       fp = fopen(mbm_total_path, "r");
+       if (!fp) {
+               perror("Failed to open total bw file");
+
+               return -1;
+       }
+       if (fscanf(fp, "%lu", &mbm_total) <= 0) {
+               perror("Could not get mbm local bytes");
+               fclose(fp);
+
+               return -1;
+       }
+       fclose(fp);
+
+       return mbm_total;
+}
+
+pid_t bm_pid, ppid;
+
+static void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
+{
+       kill(bm_pid, SIGKILL);
+       printf("Ending\n\n");
+
+       exit(EXIT_SUCCESS);
+}
+
+/*
+ * print_results_bw:   the memory bandwidth results are stored in a file
+ * @filename:          file that stores the results
+ * @bm_pid:            child pid that runs benchmark
+ * @bw_imc:            perf imc counter value
+ * @bw_resc:           memory bandwidth value
+ *
+ * Return:             0 on success. non-zero on failure.
+ */
+static int print_results_bw(char *filename,  int bm_pid, float bw_imc,
+                           unsigned long bw_resc)
+{
+       unsigned long diff = fabs(bw_imc - bw_resc);
+       FILE *fp;
+
+       if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
+               printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
+               printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
+       } else {
+               fp = fopen(filename, "a");
+               if (!fp) {
+                       perror("Cannot open results file");
+
+                       return errno;
+               }
+               if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
+                           bm_pid, bw_imc, bw_resc, diff) <= 0) {
+                       fclose(fp);
+                       perror("Could not log results.");
+
+                       return errno;
+               }
+               fclose(fp);
+       }
+
+       return 0;
+}
+
+static int
+measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
+{
+       unsigned long bw_imc, bw_resc, bw_resc_end;
+       int ret;
+
+       /*
+        * Measure memory bandwidth from resctrl and from
+        * another source which is perf imc value or could
+        * be something else if perf imc event is not available.
+        * Compare the two values to validate resctrl value.
+        * It takes 1sec to measure the data.
+        */
+       bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report);
+       if (bw_imc <= 0)
+               return bw_imc;
+
+       bw_resc_end = get_mem_bw_resctrl();
+       if (bw_resc_end <= 0)
+               return bw_resc_end;
+
+       bw_resc = (bw_resc_end - *bw_resc_start) / MB;
+       ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
+       if (ret)
+               return ret;
+
+       *bw_resc_start = bw_resc_end;
+
+       return 0;
+}
+
+/*
+ * resctrl_val:        execute benchmark and measure memory bandwidth on
+ *                     the benchmark
+ * @benchmark_cmd:     benchmark command and its arguments
+ * @param:             parameters passed to resctrl_val()
+ *
+ * Return:             0 on success. non-zero on failure.
+ */
+int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+{
+       char *resctrl_val = param->resctrl_val;
+       unsigned long bw_resc_start = 0;
+       struct sigaction sigact;
+       int ret = 0, pipefd[2];
+       char pipe_message = 0;
+       union sigval value;
+
+       if (strcmp(param->filename, "") == 0)
+               sprintf(param->filename, "stdio");
+
+       if ((strcmp(resctrl_val, "mba")) == 0 ||
+           (strcmp(resctrl_val, "mbm")) == 0) {
+               ret = validate_bw_report_request(param->bw_report);
+               if (ret)
+                       return ret;
+       }
+
+       ret = remount_resctrlfs(param->mum_resctrlfs);
+       if (ret)
+               return ret;
+
+       /*
+        * If benchmark wasn't successfully started by child, then child should
+        * kill parent, so save parent's pid
+        */
+       ppid = getpid();
+
+       if (pipe(pipefd)) {
+               perror("# Unable to create pipe");
+
+               return -1;
+       }
+
+       /*
+        * Fork to start benchmark, save child's pid so that it can be killed
+        * when needed
+        */
+       bm_pid = fork();
+       if (bm_pid == -1) {
+               perror("# Unable to fork");
+
+               return -1;
+       }
+
+       if (bm_pid == 0) {
+               /*
+                * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
+                * start benchmark
+                */
+               sigfillset(&sigact.sa_mask);
+               sigdelset(&sigact.sa_mask, SIGUSR1);
+
+               sigact.sa_sigaction = run_benchmark;
+               sigact.sa_flags = SA_SIGINFO;
+
+               /* Register for "SIGUSR1" signal from parent */
+               if (sigaction(SIGUSR1, &sigact, NULL))
+                       PARENT_EXIT("Can't register child for signal");
+
+               /* Tell parent that child is ready */
+               close(pipefd[0]);
+               pipe_message = 1;
+               if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
+                   sizeof(pipe_message)) {
+                       perror("# failed signaling parent process");
+                       close(pipefd[1]);
+                       return -1;
+               }
+               close(pipefd[1]);
+
+               /* Suspend child until delivery of "SIGUSR1" from parent */
+               sigsuspend(&sigact.sa_mask);
+
+               PARENT_EXIT("Child is done");
+       }
+
+       printf("# benchmark PID: %d\n", bm_pid);
+
+       /*
+        * Register CTRL-C handler for parent, as it has to kill benchmark
+        * before exiting
+        */
+       sigact.sa_sigaction = ctrlc_handler;
+       sigemptyset(&sigact.sa_mask);
+       sigact.sa_flags = SA_SIGINFO;
+       if (sigaction(SIGINT, &sigact, NULL) ||
+           sigaction(SIGHUP, &sigact, NULL)) {
+               perror("# sigaction");
+               ret = errno;
+               goto out;
+       }
+
+       value.sival_ptr = benchmark_cmd;
+
+       /* Taskset benchmark to specified cpu */
+       ret = taskset_benchmark(bm_pid, param->cpu_no);
+       if (ret)
+               goto out;
+
+       /* Write benchmark to specified control&monitoring grp in resctrl FS */
+       ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+                                     resctrl_val);
+       if (ret)
+               goto out;
+
+       if ((strcmp(resctrl_val, "mbm") == 0) ||
+           (strcmp(resctrl_val, "mba") == 0)) {
+               ret = initialize_mem_bw_imc();
+               if (ret)
+                       goto out;
+
+               initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
+                                         param->cpu_no, resctrl_val);
+       }
+
+       /* Parent waits for child to be ready. */
+       close(pipefd[1]);
+       while (pipe_message != 1) {
+               if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
+                   sizeof(pipe_message)) {
+                       perror("# failed reading message from child process");
+                       close(pipefd[0]);
+                       goto out;
+               }
+       }
+       close(pipefd[0]);
+
+       /* Signal child to start benchmark */
+       if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
+               perror("# sigqueue SIGUSR1 to child");
+               ret = errno;
+               goto out;
+       }
+
+       /* Give benchmark enough time to fully run */
+       sleep(1);
+
+       /* Test runs until the callback setup() tells the test to stop. */
+       while (1) {
+               if (strcmp(resctrl_val, "mbm") == 0) {
+                       ret = param->setup(1, param);
+                       if (ret) {
+                               ret = 0;
+                               break;
+                       }
+
+                       ret = measure_vals(param, &bw_resc_start);
+                       if (ret)
+                               break;
+               } else if ((strcmp(resctrl_val, "mba") == 0)) {
+                       ret = param->setup(1, param);
+                       if (ret) {
+                               ret = 0;
+                               break;
+                       }
+
+                       ret = measure_vals(param, &bw_resc_start);
+                       if (ret)
+                               break;
+               } else {
+                       break;
+               }
+       }
+
+out:
+       kill(bm_pid, SIGKILL);
+       umount_resctrlfs();
+
+       return ret;
+}