1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
7 #include <linux/compiler.h>
15 #include <sys/sysinfo.h>
17 #include <bpf/libbpf.h>
35 NUM_STATS_CNT = FILE_NAME - VERDICT,
38 /* In comparison mode each stat can specify up to four different values:
41 * - absolute diff value;
42 * - relative (percentage) diff value.
44 * When specifying stat specs in comparison mode, user can use one of the
45 * following variant suffixes to specify which exact variant should be used for
46 * ordering or filtering:
47 * - `_a` for A side value;
48 * - `_b` for B side value;
49 * - `_diff` for absolute diff value;
50 * - `_pct` for relative (percentage) diff value.
52 * If no variant suffix is provided, then `_b` (control data) is assumed.
54 * As an example, let's say instructions stat has the following output:
56 * Insns (A) Insns (B) Insns (DIFF)
57 * --------- --------- --------------
58 * 21547 20920 -627 (-2.91%)
61 * - 21547 is A side value (insns_a);
62 * - 20920 is B side value (insns_b);
63 * - -627 is absolute diff value (insns_diff);
64 * - -2.91% is relative diff value (insns_pct).
66 * For verdict there is no verdict_pct variant.
67 * For file and program name, _a and _b variants are equivalent and there are
68 * no _diff or _pct variants.
81 long stats[NUM_STATS_CNT];
84 /* joined comparison mode stats */
85 struct verif_stats_join {
89 const struct verif_stats *stats_a;
90 const struct verif_stats *stats_b;
95 enum stat_id ids[ALL_STATS_CNT];
96 enum stat_variant variants[ALL_STATS_CNT];
97 bool asc[ALL_STATS_CNT];
98 int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
103 RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
114 OP_NEQ, /* != or <> */
122 enum filter_kind kind;
128 enum operator_kind op;
130 enum stat_variant stat_var;
142 bool comparison_mode;
145 struct verif_stats *prog_stats;
148 /* baseline_stats is allocated and used only in comparison mode */
149 struct verif_stats *baseline_stats;
150 int baseline_stat_cnt;
152 struct verif_stats_join *join_stats;
155 struct stat_specs output_spec;
156 struct stat_specs sort_spec;
158 struct filter *allow_filters;
159 struct filter *deny_filters;
160 int allow_filter_cnt;
169 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
173 if (level == LIBBPF_DEBUG && !env.debug)
175 return vfprintf(stderr, format, args);
178 const char *argp_program_version = "veristat";
179 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
180 const char argp_program_doc[] =
181 "veristat BPF verifier stats collection and comparison tool.\n"
183 "USAGE: veristat <obj-file> [<obj-file>...]\n"
184 " OR: veristat -C <baseline.csv> <comparison.csv>\n";
186 static const struct argp_option opts[] = {
187 { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
188 { "verbose", 'v', NULL, 0, "Verbose mode" },
189 { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
190 { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
191 { "quiet", 'q', NULL, 0, "Quiet mode" },
192 { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
193 { "sort", 's', "SPEC", 0, "Specify sort order" },
194 { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
195 { "compare", 'C', NULL, 0, "Comparison mode" },
196 { "replay", 'R', NULL, 0, "Replay mode" },
197 { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
201 static int parse_stats(const char *stats_str, struct stat_specs *specs);
202 static int append_filter(struct filter **filters, int *cnt, const char *str);
203 static int append_filter_file(const char *path);
205 static error_t parse_arg(int key, char *arg, struct argp_state *state)
212 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
225 err = parse_stats(arg, &env.output_spec);
230 err = parse_stats(arg, &env.sort_spec);
235 if (strcmp(arg, "table") == 0) {
236 env.out_fmt = RESFMT_TABLE;
237 } else if (strcmp(arg, "csv") == 0) {
238 env.out_fmt = RESFMT_CSV;
240 fprintf(stderr, "Unrecognized output format '%s'\n", arg);
246 env.log_level = strtol(arg, NULL, 10);
248 fprintf(stderr, "invalid log level: %s\n", arg);
253 env.comparison_mode = true;
256 env.replay_mode = true;
260 err = append_filter_file(arg + 1);
261 else if (arg[0] == '!')
262 err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
264 err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
266 fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
271 tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
275 env.filenames[env.filename_cnt] = strdup(arg);
276 if (!env.filenames[env.filename_cnt])
281 return ARGP_ERR_UNKNOWN;
286 static const struct argp argp = {
289 .doc = argp_program_doc,
293 /* Adapted from perf/util/string.c */
294 static bool glob_matches(const char *str, const char *pat)
296 while (*str && *pat && *pat != '*') {
302 /* Check wild card */
306 if (!*pat) /* Tail wild card matches all */
309 if (glob_matches(str++, pat))
312 return !*str && !*pat;
315 static bool is_bpf_obj_file(const char *path) {
317 int fd, err = -EINVAL;
320 fd = open(path, O_RDONLY | O_CLOEXEC);
322 return true; /* we'll fail later and propagate error */
324 /* ensure libelf is initialized */
325 (void)elf_version(EV_CURRENT);
327 elf = elf_begin(fd, ELF_C_READ, NULL);
331 if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
334 ehdr = elf64_getehdr(elf);
335 /* Old LLVM set e_machine to EM_NONE */
336 if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
347 static bool should_process_file_prog(const char *filename, const char *prog_name)
350 int i, allow_cnt = 0;
352 for (i = 0; i < env.deny_filter_cnt; i++) {
353 f = &env.deny_filters[i];
354 if (f->kind != FILTER_NAME)
357 if (f->any_glob && glob_matches(filename, f->any_glob))
359 if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
361 if (f->file_glob && glob_matches(filename, f->file_glob))
363 if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
367 for (i = 0; i < env.allow_filter_cnt; i++) {
368 f = &env.allow_filters[i];
369 if (f->kind != FILTER_NAME)
374 if (glob_matches(filename, f->any_glob))
376 /* If we don't know program name yet, any_glob filter
377 * has to assume that current BPF object file might be
378 * relevant; we'll check again later on after opening
379 * BPF object file, at which point program name will
382 if (!prog_name || glob_matches(prog_name, f->any_glob))
385 if (f->file_glob && !glob_matches(filename, f->file_glob))
387 if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
393 /* if there are no file/prog name allow filters, allow all progs,
394 * unless they are denied earlier explicitly
396 return allow_cnt == 0;
400 enum operator_kind op_kind;
403 /* Order of these definitions matter to avoid situations like '<'
404 * matching part of what is actually a '<>' operator. That is,
405 * substrings should go last.
417 static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
419 static int append_filter(struct filter **filters, int *cnt, const char *str)
426 tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
431 f = &(*filters)[*cnt];
432 memset(f, 0, sizeof(*f));
434 /* First, let's check if it's a stats filter of the following form:
435 * <stat><op><value, where:
436 * - <stat> is one of supported numerical stats (verdict is also
437 * considered numerical, failure == 0, success == 1);
438 * - <op> is comparison operator (see `operators` definitions);
439 * - <value> is an integer (or failure/success, or false/true as
440 * special aliases for 0 and 1, respectively).
441 * If the form doesn't match what user provided, we assume file/prog
444 for (i = 0; i < ARRAY_SIZE(operators); i++) {
445 enum stat_variant var;
448 const char *end = str;
451 op_str = operators[i].op_str;
452 p = strstr(str, op_str);
456 if (!parse_stat_id_var(str, p - str, &id, &var)) {
457 fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
460 if (id >= FILE_NAME) {
461 fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
467 if (strcasecmp(p, "true") == 0 ||
468 strcasecmp(p, "t") == 0 ||
469 strcasecmp(p, "success") == 0 ||
470 strcasecmp(p, "succ") == 0 ||
471 strcasecmp(p, "s") == 0 ||
472 strcasecmp(p, "match") == 0 ||
473 strcasecmp(p, "m") == 0) {
475 } else if (strcasecmp(p, "false") == 0 ||
476 strcasecmp(p, "f") == 0 ||
477 strcasecmp(p, "failure") == 0 ||
478 strcasecmp(p, "fail") == 0 ||
479 strcasecmp(p, "mismatch") == 0 ||
480 strcasecmp(p, "mis") == 0) {
484 val = strtol(p, (char **)&end, 10);
485 if (errno || end == p || *end != '\0' ) {
486 fprintf(stderr, "Invalid integer value in '%s'!\n", str);
491 f->kind = FILTER_STAT;
494 f->op = operators[i].op_kind;
501 /* File/prog filter can be specified either as '<glob>' or
502 * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
503 * both file and program names. This seems to be way more useful in
504 * practice. If user needs full control, they can use '/<prog-glob>'
505 * form to glob just program name, or '<file-glob>/' to glob only file
506 * name. But usually common <glob> seems to be the most useful and
509 f->kind = FILTER_NAME;
510 p = strchr(str, '/');
512 f->any_glob = strdup(str);
517 /* non-empty file glob */
518 f->file_glob = strndup(str, p - str);
522 if (strlen(p + 1) > 0) {
523 /* non-empty prog glob */
524 f->prog_glob = strdup(p + 1);
537 static int append_filter_file(const char *path)
543 f = fopen(path, "r");
546 fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
550 while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
551 /* lines starting with # are comments, skip them */
552 if (buf[0] == '\0' || buf[0] == '#')
554 /* lines starting with ! are negative match filters */
556 err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
558 err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
568 static const struct stat_specs default_output_spec = {
571 FILE_NAME, PROG_NAME, VERDICT, DURATION,
572 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
576 static const struct stat_specs default_csv_output_spec = {
579 FILE_NAME, PROG_NAME, VERDICT, DURATION,
580 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
581 MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
585 static const struct stat_specs default_sort_spec = {
588 FILE_NAME, PROG_NAME,
590 .asc = { true, true, },
593 /* sorting for comparison mode to join two data sets */
594 static const struct stat_specs join_sort_spec = {
597 FILE_NAME, PROG_NAME,
599 .asc = { true, true, },
602 static struct stat_def {
604 const char *names[4];
608 [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
609 [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
610 [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
611 [DURATION] = { "Duration (us)", {"duration", "dur"}, },
612 [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
613 [TOTAL_STATES] = { "States", {"total_states", "states"}, },
614 [PEAK_STATES] = { "Peak states", {"peak_states"}, },
615 [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
616 [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
619 static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
621 static const char *var_sfxs[] = {
624 [VARIANT_DIFF] = "_diff",
625 [VARIANT_PCT] = "_pct",
629 for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
630 struct stat_def *def = &stat_defs[i];
631 size_t alias_len, sfx_len;
634 for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
635 alias = def->names[j];
639 alias_len = strlen(alias);
640 if (strncmp(name, alias, alias_len) != 0)
643 if (alias_len == len) {
644 /* If no variant suffix is specified, we
645 * assume control group (just in case we are
646 * in comparison mode. Variant is ignored in
647 * non-comparison mode.
654 for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
655 sfx_len = strlen(var_sfxs[k]);
656 if (alias_len + sfx_len != len)
659 if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
660 *var = (enum stat_variant)k;
671 static bool is_asc_sym(char c)
676 static bool is_desc_sym(char c)
678 return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
681 static int parse_stat(const char *stat_name, struct stat_specs *specs)
684 bool has_order = false, is_asc = false;
685 size_t len = strlen(stat_name);
686 enum stat_variant var;
688 if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
689 fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
693 if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
695 is_asc = is_asc_sym(stat_name[len - 1]);
699 if (!parse_stat_id_var(stat_name, len, &id, &var)) {
700 fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
704 specs->ids[specs->spec_cnt] = id;
705 specs->variants[specs->spec_cnt] = var;
706 specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
712 static int parse_stats(const char *stats_str, struct stat_specs *specs)
714 char *input, *state = NULL, *next;
717 input = strdup(stats_str);
721 while ((next = strtok_r(state ? NULL : input, ",", &state))) {
722 err = parse_stat(next, specs);
730 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
737 for (i = 0; i < stat_cnt; i++) {
738 free(stats[i].file_name);
739 free(stats[i].prog_name);
744 static char verif_log_buf[64 * 1024];
746 #define MAX_PARSED_LOG_LINES 100
748 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
753 buf[buf_sz - 1] = '\0';
755 for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
756 /* find previous endline or otherwise take the start of log buf */
757 for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
759 /* next time start from end of previous line (or pos goes to <0) */
761 /* if we found endline, point right after endline symbol;
762 * otherwise, stay at the beginning of log buf
767 if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
769 if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
770 &s->stats[TOTAL_INSNS],
771 &s->stats[MAX_STATES_PER_INSN],
772 &s->stats[TOTAL_STATES],
773 &s->stats[PEAK_STATES],
774 &s->stats[MARK_READ_MAX_LEN]))
781 static void fixup_obj(struct bpf_object *obj)
785 bpf_object__for_each_map(map, obj) {
786 /* disable pinning */
787 bpf_map__set_pin_path(map, NULL);
789 /* fix up map size, if necessary */
790 switch (bpf_map__type(map)) {
791 case BPF_MAP_TYPE_SK_STORAGE:
792 case BPF_MAP_TYPE_TASK_STORAGE:
793 case BPF_MAP_TYPE_INODE_STORAGE:
794 case BPF_MAP_TYPE_CGROUP_STORAGE:
797 if (bpf_map__max_entries(map) == 0)
798 bpf_map__set_max_entries(map, 1);
803 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
805 const char *prog_name = bpf_program__name(prog);
806 size_t buf_sz = sizeof(verif_log_buf);
807 char *buf = verif_log_buf;
808 struct verif_stats *stats;
812 if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
817 tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
820 env.prog_stats = tmp;
821 stats = &env.prog_stats[env.prog_stat_cnt++];
822 memset(stats, 0, sizeof(*stats));
825 buf_sz = 16 * 1024 * 1024;
826 buf = malloc(buf_sz);
829 bpf_program__set_log_buf(prog, buf, buf_sz);
830 bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
832 bpf_program__set_log_buf(prog, buf, buf_sz);
833 bpf_program__set_log_level(prog, 4); /* only verifier stats */
835 verif_log_buf[0] = '\0';
837 /* increase chances of successful BPF object loading */
840 err = bpf_object__load(obj);
841 env.progs_processed++;
843 stats->file_name = strdup(basename(filename));
844 stats->prog_name = strdup(bpf_program__name(prog));
845 stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
846 parse_verif_log(buf, buf_sz, stats);
849 printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
850 filename, prog_name, stats->stats[DURATION],
851 err ? "failure" : "success", buf);
854 if (verif_log_buf != buf)
860 static int process_obj(const char *filename)
862 struct bpf_object *obj = NULL, *tobj;
863 struct bpf_program *prog, *tprog, *lprog;
864 libbpf_print_fn_t old_libbpf_print_fn;
865 LIBBPF_OPTS(bpf_object_open_opts, opts);
866 int err = 0, prog_cnt = 0;
868 if (!should_process_file_prog(basename(filename), NULL)) {
870 printf("Skipping '%s' due to filters...\n", filename);
874 if (!is_bpf_obj_file(filename)) {
876 printf("Skipping '%s' as it's not a BPF object file...\n", filename);
881 if (!env.quiet && env.out_fmt == RESFMT_TABLE)
882 printf("Processing '%s'...\n", basename(filename));
884 old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
885 obj = bpf_object__open_file(filename, &opts);
887 /* if libbpf can't open BPF object file, it could be because
888 * that BPF object file is incomplete and has to be statically
889 * linked into a final BPF object file; instead of bailing
890 * out, report it into stderr, mark it as skipped, and
893 fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
899 env.files_processed++;
901 bpf_object__for_each_program(prog, obj) {
906 prog = bpf_object__next_program(obj, NULL);
907 bpf_program__set_autoload(prog, true);
908 process_prog(filename, obj, prog);
912 bpf_object__for_each_program(prog, obj) {
913 const char *prog_name = bpf_program__name(prog);
915 tobj = bpf_object__open_file(filename, &opts);
918 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
922 bpf_object__for_each_program(tprog, tobj) {
923 const char *tprog_name = bpf_program__name(tprog);
925 if (strcmp(prog_name, tprog_name) == 0) {
926 bpf_program__set_autoload(tprog, true);
929 bpf_program__set_autoload(tprog, false);
933 process_prog(filename, tobj, lprog);
934 bpf_object__close(tobj);
938 bpf_object__close(obj);
939 libbpf_set_print(old_libbpf_print_fn);
943 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
944 enum stat_id id, bool asc)
950 cmp = strcmp(s1->file_name, s2->file_name);
953 cmp = strcmp(s1->prog_name, s2->prog_name);
960 case MAX_STATES_PER_INSN:
961 case MARK_READ_MAX_LEN: {
962 long v1 = s1->stats[id];
963 long v2 = s2->stats[id];
966 cmp = v1 < v2 ? -1 : 1;
970 fprintf(stderr, "Unrecognized stat #%d\n", id);
974 return asc ? cmp : -cmp;
977 static int cmp_prog_stats(const void *v1, const void *v2)
979 const struct verif_stats *s1 = v1, *s2 = v2;
982 for (i = 0; i < env.sort_spec.spec_cnt; i++) {
983 cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
988 /* always disambiguate with file+prog, which are unique */
989 cmp = strcmp(s1->file_name, s2->file_name);
992 return strcmp(s1->prog_name, s2->prog_name);
995 static void fetch_join_stat_value(const struct verif_stats_join *s,
996 enum stat_id id, enum stat_variant var,
997 const char **str_val,
1002 if (id == FILE_NAME) {
1003 *str_val = s->file_name;
1006 if (id == PROG_NAME) {
1007 *str_val = s->prog_name;
1011 v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1012 v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1017 *num_val = -DBL_MAX;
1019 *num_val = s->stats_a->stats[id];
1023 *num_val = -DBL_MAX;
1025 *num_val = s->stats_b->stats[id];
1028 if (!s->stats_a || !s->stats_b)
1029 *num_val = -DBL_MAX;
1030 else if (id == VERDICT)
1031 *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1033 *num_val = (double)(v2 - v1);
1036 if (!s->stats_a || !s->stats_b) {
1037 *num_val = -DBL_MAX;
1038 } else if (v1 == 0) {
1042 *num_val = v2 < v1 ? -100.0 : 100.0;
1044 *num_val = (v2 - v1) * 100.0 / v1;
1050 static int cmp_join_stat(const struct verif_stats_join *s1,
1051 const struct verif_stats_join *s2,
1052 enum stat_id id, enum stat_variant var, bool asc)
1054 const char *str1 = NULL, *str2 = NULL;
1058 fetch_join_stat_value(s1, id, var, &str1, &v1);
1059 fetch_join_stat_value(s2, id, var, &str2, &v2);
1062 cmp = strcmp(str1, str2);
1064 cmp = v1 < v2 ? -1 : 1;
1066 return asc ? cmp : -cmp;
1069 static int cmp_join_stats(const void *v1, const void *v2)
1071 const struct verif_stats_join *s1 = v1, *s2 = v2;
1074 for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1075 cmp = cmp_join_stat(s1, s2,
1076 env.sort_spec.ids[i],
1077 env.sort_spec.variants[i],
1078 env.sort_spec.asc[i]);
1083 /* always disambiguate with file+prog, which are unique */
1084 cmp = strcmp(s1->file_name, s2->file_name);
1087 return strcmp(s1->prog_name, s2->prog_name);
1090 #define HEADER_CHAR '-'
1091 #define COLUMN_SEP " "
1093 static void output_header_underlines(void)
1097 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1098 len = env.output_spec.lens[i];
1100 printf("%s", i == 0 ? "" : COLUMN_SEP);
1101 for (j = 0; j < len; j++)
1102 printf("%c", HEADER_CHAR);
1107 static void output_headers(enum resfmt fmt)
1109 const char *fmt_str;
1112 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1113 int id = env.output_spec.ids[i];
1114 int *max_len = &env.output_spec.lens[i];
1117 case RESFMT_TABLE_CALCLEN:
1118 len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1123 fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1124 printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
1125 if (i == env.output_spec.spec_cnt - 1)
1129 printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1130 if (i == env.output_spec.spec_cnt - 1)
1136 if (fmt == RESFMT_TABLE)
1137 output_header_underlines();
1140 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1141 const char **str, long *val)
1145 *str = s ? s->file_name : "N/A";
1148 *str = s ? s->prog_name : "N/A";
1154 *str = s->stats[VERDICT] ? "success" : "failure";
1160 case MAX_STATES_PER_INSN:
1161 case MARK_READ_MAX_LEN:
1162 *val = s ? s->stats[id] : 0;
1165 fprintf(stderr, "Unrecognized stat #%d\n", id);
1170 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1174 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1175 int id = env.output_spec.ids[i];
1176 int *max_len = &env.output_spec.lens[i], len;
1177 const char *str = NULL;
1180 prepare_value(s, id, &str, &val);
1183 case RESFMT_TABLE_CALCLEN:
1185 len = snprintf(NULL, 0, "%s", str);
1187 len = snprintf(NULL, 0, "%ld", val);
1193 printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1195 printf("%s%*ld", i == 0 ? "" : COLUMN_SEP, *max_len, val);
1196 if (i == env.output_spec.spec_cnt - 1)
1201 printf("%s%s", i == 0 ? "" : ",", str);
1203 printf("%s%ld", i == 0 ? "" : ",", val);
1204 if (i == env.output_spec.spec_cnt - 1)
1210 if (last && fmt == RESFMT_TABLE) {
1211 output_header_underlines();
1212 printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1213 env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1217 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1221 st->file_name = strdup(str);
1226 st->prog_name = strdup(str);
1231 if (strcmp(str, "success") == 0) {
1232 st->stats[VERDICT] = true;
1233 } else if (strcmp(str, "failure") == 0) {
1234 st->stats[VERDICT] = false;
1236 fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1244 case MAX_STATES_PER_INSN:
1245 case MARK_READ_MAX_LEN: {
1249 if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1251 fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1255 st->stats[id] = val;
1259 fprintf(stderr, "Unrecognized stat #%d\n", id);
1265 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1266 struct verif_stats **statsp, int *stat_cntp)
1273 f = fopen(filename, "r");
1276 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1282 while (fgets(line, sizeof(line), f)) {
1283 char *input = line, *state = NULL, *next;
1284 struct verif_stats *st = NULL;
1290 tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1297 st = &(*statsp)[*stat_cntp];
1298 memset(st, 0, sizeof(*st));
1303 while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
1305 /* for the first line, set up spec stats */
1306 err = parse_stat(next, specs);
1312 /* for all other lines, parse values based on spec */
1313 if (col >= specs->spec_cnt) {
1314 fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1315 col, *stat_cntp, filename);
1319 err = parse_stat_value(next, specs->ids[col], st);
1330 if (col < specs->spec_cnt) {
1331 fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1332 *stat_cntp, filename);
1337 if (!st->file_name || !st->prog_name) {
1338 fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1339 *stat_cntp, filename);
1344 /* in comparison mode we can only check filters after we
1345 * parsed entire line; if row should be ignored we pretend we
1348 if (!should_process_file_prog(st->file_name, st->prog_name)) {
1349 free(st->file_name);
1350 free(st->prog_name);
1357 fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1365 /* empty/zero stats for mismatched rows */
1366 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1368 static bool is_key_stat(enum stat_id id)
1370 return id == FILE_NAME || id == PROG_NAME;
1373 static void output_comp_header_underlines(void)
1377 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1378 int id = env.output_spec.ids[i];
1379 int max_j = is_key_stat(id) ? 1 : 3;
1381 for (j = 0; j < max_j; j++) {
1382 int len = env.output_spec.lens[3 * i + j];
1384 printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1386 for (k = 0; k < len; k++)
1387 printf("%c", HEADER_CHAR);
1393 static void output_comp_headers(enum resfmt fmt)
1395 static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1396 static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1399 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1400 int id = env.output_spec.ids[i];
1401 /* key stats don't have A/B/DIFF columns, they are common for both data sets */
1402 int max_j = is_key_stat(id) ? 1 : 3;
1404 for (j = 0; j < max_j; j++) {
1405 int *max_len = &env.output_spec.lens[3 * i + j];
1406 bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1410 case RESFMT_TABLE_CALCLEN:
1411 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1412 len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1417 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1418 printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1419 *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1424 sfx = is_key_stat(id) ? "" : name_sfxs[j];
1425 printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1433 if (fmt == RESFMT_TABLE)
1434 output_comp_header_underlines();
1437 static void output_comp_stats(const struct verif_stats_join *join_stats,
1438 enum resfmt fmt, bool last)
1440 const struct verif_stats *base = join_stats->stats_a;
1441 const struct verif_stats *comp = join_stats->stats_b;
1442 char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1445 for (i = 0; i < env.output_spec.spec_cnt; i++) {
1446 int id = env.output_spec.ids[i], len;
1447 int *max_len_base = &env.output_spec.lens[3 * i + 0];
1448 int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1449 int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1450 const char *base_str = NULL, *comp_str = NULL;
1451 long base_val = 0, comp_val = 0, diff_val = 0;
1453 prepare_value(base, id, &base_str, &base_val);
1454 prepare_value(comp, id, &comp_str, &comp_val);
1456 /* normalize all the outputs to be in string buffers for simplicity */
1457 if (is_key_stat(id)) {
1458 /* key stats (file and program name) are always strings */
1460 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1462 snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1463 } else if (base_str) {
1464 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1465 snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1467 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1468 else if (strcmp(base_str, comp_str) == 0)
1469 snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1471 snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1476 snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1478 snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1480 snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1482 snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1484 diff_val = comp_val - base_val;
1485 if (!base || !comp) {
1486 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1488 if (base_val == 0) {
1489 if (comp_val == base_val)
1490 p = 0.0; /* avoid +0 (+100%) case */
1492 p = comp_val < base_val ? -100.0 : 100.0;
1494 p = diff_val * 100.0 / base_val;
1496 snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1501 case RESFMT_TABLE_CALCLEN:
1502 len = strlen(base_buf);
1503 if (len > *max_len_base)
1504 *max_len_base = len;
1505 if (!is_key_stat(id)) {
1506 len = strlen(comp_buf);
1507 if (len > *max_len_comp)
1508 *max_len_comp = len;
1509 len = strlen(diff_buf);
1510 if (len > *max_len_diff)
1511 *max_len_diff = len;
1514 case RESFMT_TABLE: {
1515 /* string outputs are left-aligned, number outputs are right-aligned */
1516 const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1518 printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1519 if (!is_key_stat(id)) {
1520 printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1521 printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1523 if (i == env.output_spec.spec_cnt - 1)
1528 printf("%s%s", i == 0 ? "" : ",", base_buf);
1529 if (!is_key_stat(id)) {
1530 printf("%s%s", i == 0 ? "" : ",", comp_buf);
1531 printf("%s%s", i == 0 ? "" : ",", diff_buf);
1533 if (i == env.output_spec.spec_cnt - 1)
1539 if (last && fmt == RESFMT_TABLE)
1540 output_comp_header_underlines();
1543 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1547 r = strcmp(base->file_name, comp->file_name);
1550 return strcmp(base->prog_name, comp->prog_name);
1553 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1555 static const double eps = 1e-9;
1556 const char *str = NULL;
1559 fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1562 case OP_EQ: return value > f->value - eps && value < f->value + eps;
1563 case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1564 case OP_LT: return value < f->value - eps;
1565 case OP_LE: return value <= f->value + eps;
1566 case OP_GT: return value > f->value + eps;
1567 case OP_GE: return value >= f->value - eps;
1570 fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1574 static bool should_output_join_stats(const struct verif_stats_join *stats)
1577 int i, allow_cnt = 0;
1579 for (i = 0; i < env.deny_filter_cnt; i++) {
1580 f = &env.deny_filters[i];
1581 if (f->kind != FILTER_STAT)
1584 if (is_join_stat_filter_matched(f, stats))
1588 for (i = 0; i < env.allow_filter_cnt; i++) {
1589 f = &env.allow_filters[i];
1590 if (f->kind != FILTER_STAT)
1594 if (is_join_stat_filter_matched(f, stats))
1598 /* if there are no stat allowed filters, pass everything through */
1599 return allow_cnt == 0;
1602 static int handle_comparison_mode(void)
1604 struct stat_specs base_specs = {}, comp_specs = {};
1605 struct stat_specs tmp_sort_spec;
1606 enum resfmt cur_fmt;
1607 int err, i, j, last_idx;
1609 if (env.filename_cnt != 2) {
1610 fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1611 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1615 err = parse_stats_csv(env.filenames[0], &base_specs,
1616 &env.baseline_stats, &env.baseline_stat_cnt);
1618 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1621 err = parse_stats_csv(env.filenames[1], &comp_specs,
1622 &env.prog_stats, &env.prog_stat_cnt);
1624 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1628 /* To keep it simple we validate that the set and order of stats in
1629 * both CSVs are exactly the same. This can be lifted with a bit more
1630 * pre-processing later.
1632 if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1633 fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1634 env.filenames[0], env.filenames[1],
1635 base_specs.spec_cnt, comp_specs.spec_cnt);
1638 for (i = 0; i < base_specs.spec_cnt; i++) {
1639 if (base_specs.ids[i] != comp_specs.ids[i]) {
1640 fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1641 env.filenames[0], env.filenames[1],
1642 stat_defs[base_specs.ids[i]].names[0],
1643 stat_defs[comp_specs.ids[i]].names[0]);
1648 /* Replace user-specified sorting spec with file+prog sorting rule to
1649 * be able to join two datasets correctly. Once we are done, we will
1650 * restore the original sort spec.
1652 tmp_sort_spec = env.sort_spec;
1653 env.sort_spec = join_sort_spec;
1654 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1655 qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1656 env.sort_spec = tmp_sort_spec;
1658 /* Join two datasets together. If baseline and comparison datasets
1659 * have different subset of rows (we match by 'object + prog' as
1660 * a unique key) then assume empty/missing/zero value for rows that
1661 * are missing in the opposite data set.
1664 while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1665 const struct verif_stats *base, *comp;
1666 struct verif_stats_join *join;
1670 base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1671 comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1673 if (!base->file_name || !base->prog_name) {
1674 fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1675 i, env.filenames[0]);
1678 if (!comp->file_name || !comp->prog_name) {
1679 fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1680 j, env.filenames[1]);
1684 tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
1687 env.join_stats = tmp;
1689 join = &env.join_stats[env.join_stat_cnt];
1690 memset(join, 0, sizeof(*join));
1692 r = cmp_stats_key(base, comp);
1694 join->file_name = base->file_name;
1695 join->prog_name = base->prog_name;
1696 join->stats_a = base;
1697 join->stats_b = comp;
1700 } else if (comp == &fallback_stats || r < 0) {
1701 join->file_name = base->file_name;
1702 join->prog_name = base->prog_name;
1703 join->stats_a = base;
1704 join->stats_b = NULL;
1707 join->file_name = comp->file_name;
1708 join->prog_name = comp->prog_name;
1709 join->stats_a = NULL;
1710 join->stats_b = comp;
1713 env.join_stat_cnt += 1;
1716 /* now sort joined results accorsing to sort spec */
1717 qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
1719 /* for human-readable table output we need to do extra pass to
1720 * calculate column widths, so we substitute current output format
1721 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1722 * and do everything again.
1724 if (env.out_fmt == RESFMT_TABLE)
1725 cur_fmt = RESFMT_TABLE_CALCLEN;
1727 cur_fmt = env.out_fmt;
1730 output_comp_headers(cur_fmt);
1732 for (i = 0; i < env.join_stat_cnt; i++) {
1733 const struct verif_stats_join *join = &env.join_stats[i];
1735 if (!should_output_join_stats(join))
1738 if (cur_fmt == RESFMT_TABLE_CALCLEN)
1741 output_comp_stats(join, cur_fmt, i == last_idx);
1744 if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1745 cur_fmt = RESFMT_TABLE;
1746 goto one_more_time; /* ... this time with feeling */
1752 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
1754 long value = stats->stats[f->stat_id];
1757 case OP_EQ: return value == f->value;
1758 case OP_NEQ: return value != f->value;
1759 case OP_LT: return value < f->value;
1760 case OP_LE: return value <= f->value;
1761 case OP_GT: return value > f->value;
1762 case OP_GE: return value >= f->value;
1765 fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1769 static bool should_output_stats(const struct verif_stats *stats)
1772 int i, allow_cnt = 0;
1774 for (i = 0; i < env.deny_filter_cnt; i++) {
1775 f = &env.deny_filters[i];
1776 if (f->kind != FILTER_STAT)
1779 if (is_stat_filter_matched(f, stats))
1783 for (i = 0; i < env.allow_filter_cnt; i++) {
1784 f = &env.allow_filters[i];
1785 if (f->kind != FILTER_STAT)
1789 if (is_stat_filter_matched(f, stats))
1793 /* if there are no stat allowed filters, pass everything through */
1794 return allow_cnt == 0;
1797 static void output_prog_stats(void)
1799 const struct verif_stats *stats;
1800 int i, last_stat_idx = 0;
1802 if (env.out_fmt == RESFMT_TABLE) {
1803 /* calculate column widths */
1804 output_headers(RESFMT_TABLE_CALCLEN);
1805 for (i = 0; i < env.prog_stat_cnt; i++) {
1806 stats = &env.prog_stats[i];
1807 if (!should_output_stats(stats))
1809 output_stats(stats, RESFMT_TABLE_CALCLEN, false);
1814 /* actually output the table */
1815 output_headers(env.out_fmt);
1816 for (i = 0; i < env.prog_stat_cnt; i++) {
1817 stats = &env.prog_stats[i];
1818 if (!should_output_stats(stats))
1820 output_stats(stats, env.out_fmt, i == last_stat_idx);
1824 static int handle_verif_mode(void)
1828 if (env.filename_cnt == 0) {
1829 fprintf(stderr, "Please provide path to BPF object file!\n\n");
1830 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1834 for (i = 0; i < env.filename_cnt; i++) {
1835 err = process_obj(env.filenames[i]);
1837 fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
1842 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1844 output_prog_stats();
1849 static int handle_replay_mode(void)
1851 struct stat_specs specs = {};
1854 if (env.filename_cnt != 1) {
1855 fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
1856 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1860 err = parse_stats_csv(env.filenames[0], &specs,
1861 &env.prog_stats, &env.prog_stat_cnt);
1863 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1867 qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1869 output_prog_stats();
1874 int main(int argc, char **argv)
1878 if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
1881 if (env.verbose && env.quiet) {
1882 fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
1883 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1886 if (env.verbose && env.log_level == 0)
1889 if (env.output_spec.spec_cnt == 0) {
1890 if (env.out_fmt == RESFMT_CSV)
1891 env.output_spec = default_csv_output_spec;
1893 env.output_spec = default_output_spec;
1895 if (env.sort_spec.spec_cnt == 0)
1896 env.sort_spec = default_sort_spec;
1898 if (env.comparison_mode && env.replay_mode) {
1899 fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
1900 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1904 if (env.comparison_mode)
1905 err = handle_comparison_mode();
1906 else if (env.replay_mode)
1907 err = handle_replay_mode();
1909 err = handle_verif_mode();
1911 free_verif_stats(env.prog_stats, env.prog_stat_cnt);
1912 free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
1913 free(env.join_stats);
1914 for (i = 0; i < env.filename_cnt; i++)
1915 free(env.filenames[i]);
1916 free(env.filenames);
1917 for (i = 0; i < env.allow_filter_cnt; i++) {
1918 free(env.allow_filters[i].any_glob);
1919 free(env.allow_filters[i].file_glob);
1920 free(env.allow_filters[i].prog_glob);
1922 free(env.allow_filters);
1923 for (i = 0; i < env.deny_filter_cnt; i++) {
1924 free(env.deny_filters[i].any_glob);
1925 free(env.deny_filters[i].file_glob);
1926 free(env.deny_filters[i].prog_glob);
1928 free(env.deny_filters);