tools/testing/selftests/bpf/veristat.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
   3 #define _GNU_SOURCE
   4 #include <argp.h>
   5 #include <string.h>
   6 #include <stdlib.h>
   7 #include <linux/compiler.h>
   8 #include <sched.h>
   9 #include <pthread.h>
  10 #include <dirent.h>
  11 #include <signal.h>
  12 #include <fcntl.h>
  13 #include <unistd.h>
  14 #include <sys/time.h>
  15 #include <sys/sysinfo.h>
  16 #include <sys/stat.h>
  17 #include <bpf/libbpf.h>
  18 #include <libelf.h>
  19 #include <gelf.h>
  20 #include <float.h>
  21
  22 enum stat_id {
  23         VERDICT,
  24         DURATION,
  25         TOTAL_INSNS,
  26         TOTAL_STATES,
  27         PEAK_STATES,
  28         MAX_STATES_PER_INSN,
  29         MARK_READ_MAX_LEN,
  30
  31         FILE_NAME,
  32         PROG_NAME,
  33
  34         ALL_STATS_CNT,
  35         NUM_STATS_CNT = FILE_NAME - VERDICT,
  36 };
  37
  38 /* In comparison mode each stat can specify up to four different values:
  39  *   - A side value;
  40  *   - B side value;
  41  *   - absolute diff value;
  42  *   - relative (percentage) diff value.
  43  *
  44  * When specifying stat specs in comparison mode, user can use one of the
  45  * following variant suffixes to specify which exact variant should be used for
  46  * ordering or filtering:
  47  *   - `_a` for A side value;
  48  *   - `_b` for B side value;
  49  *   - `_diff` for absolute diff value;
  50  *   - `_pct` for relative (percentage) diff value.
  51  *
  52  * If no variant suffix is provided, then `_b` (control data) is assumed.
  53  *
  54  * As an example, let's say instructions stat has the following output:
  55  *
  56  * Insns (A)  Insns (B)  Insns   (DIFF)
  57  * ---------  ---------  --------------
  58  * 21547      20920       -627 (-2.91%)
  59  *
  60  * Then:
  61  *   - 21547 is A side value (insns_a);
  62  *   - 20920 is B side value (insns_b);
  63  *   - -627 is absolute diff value (insns_diff);
  64  *   - -2.91% is relative diff value (insns_pct).
  65  *
  66  * For verdict there is no verdict_pct variant.
  67  * For file and program name, _a and _b variants are equivalent and there are
  68  * no _diff or _pct variants.
  69  */
  70 enum stat_variant {
  71         VARIANT_A,
  72         VARIANT_B,
  73         VARIANT_DIFF,
  74         VARIANT_PCT,
  75 };
  76
  77 struct verif_stats {
  78         char *file_name;
  79         char *prog_name;
  80
  81         long stats[NUM_STATS_CNT];
  82 };
  83
  84 /* joined comparison mode stats */
  85 struct verif_stats_join {
  86         char *file_name;
  87         char *prog_name;
  88
  89         const struct verif_stats *stats_a;
  90         const struct verif_stats *stats_b;
  91 };
  92
  93 struct stat_specs {
  94         int spec_cnt;
  95         enum stat_id ids[ALL_STATS_CNT];
  96         enum stat_variant variants[ALL_STATS_CNT];
  97         bool asc[ALL_STATS_CNT];
  98         int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
  99 };
 100
 101 enum resfmt {
 102         RESFMT_TABLE,
 103         RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
 104         RESFMT_CSV,
 105 };
 106
 107 enum filter_kind {
 108         FILTER_NAME,
 109         FILTER_STAT,
 110 };
 111
 112 enum operator_kind {
 113         OP_EQ,          /* == or = */
 114         OP_NEQ,         /* != or <> */
 115         OP_LT,          /* < */
 116         OP_LE,          /* <= */
 117         OP_GT,          /* > */
 118         OP_GE,          /* >= */
 119 };
 120
 121 struct filter {
 122         enum filter_kind kind;
 123         /* FILTER_NAME */
 124         char *any_glob;
 125         char *file_glob;
 126         char *prog_glob;
 127         /* FILTER_STAT */
 128         enum operator_kind op;
 129         int stat_id;
 130         enum stat_variant stat_var;
 131         long value;
 132 };
 133
 134 static struct env {
 135         char **filenames;
 136         int filename_cnt;
 137         bool verbose;
 138         bool debug;
 139         bool quiet;
 140         int log_level;
 141         enum resfmt out_fmt;
 142         bool comparison_mode;
 143         bool replay_mode;
 144
 145         struct verif_stats *prog_stats;
 146         int prog_stat_cnt;
 147
 148         /* baseline_stats is allocated and used only in comparison mode */
 149         struct verif_stats *baseline_stats;
 150         int baseline_stat_cnt;
 151
 152         struct verif_stats_join *join_stats;
 153         int join_stat_cnt;
 154
 155         struct stat_specs output_spec;
 156         struct stat_specs sort_spec;
 157
 158         struct filter *allow_filters;
 159         struct filter *deny_filters;
 160         int allow_filter_cnt;
 161         int deny_filter_cnt;
 162
 163         int files_processed;
 164         int files_skipped;
 165         int progs_processed;
 166         int progs_skipped;
 167 } env;
 168
 169 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 170 {
 171         if (!env.verbose)
 172                 return 0;
 173         if (level == LIBBPF_DEBUG  && !env.debug)
 174                 return 0;
 175         return vfprintf(stderr, format, args);
 176 }
 177
 178 const char *argp_program_version = "veristat";
 179 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 180 const char argp_program_doc[] =
 181 "veristat    BPF verifier stats collection and comparison tool.\n"
 182 "\n"
 183 "USAGE: veristat <obj-file> [<obj-file>...]\n"
 184 "   OR: veristat -C <baseline.csv> <comparison.csv>\n";
 185
 186 static const struct argp_option opts[] = {
 187         { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
 188         { "verbose", 'v', NULL, 0, "Verbose mode" },
 189         { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 190         { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
 191         { "quiet", 'q', NULL, 0, "Quiet mode" },
 192         { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 193         { "sort", 's', "SPEC", 0, "Specify sort order" },
 194         { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
 195         { "compare", 'C', NULL, 0, "Comparison mode" },
 196         { "replay", 'R', NULL, 0, "Replay mode" },
 197         { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
 198         {},
 199 };
 200
 201 static int parse_stats(const char *stats_str, struct stat_specs *specs);
 202 static int append_filter(struct filter **filters, int *cnt, const char *str);
 203 static int append_filter_file(const char *path);
 204
 205 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 206 {
 207         void *tmp;
 208         int err;
 209
 210         switch (key) {
 211         case 'h':
 212                 argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
 213                 break;
 214         case 'v':
 215                 env.verbose = true;
 216                 break;
 217         case 'd':
 218                 env.debug = true;
 219                 env.verbose = true;
 220                 break;
 221         case 'q':
 222                 env.quiet = true;
 223                 break;
 224         case 'e':
 225                 err = parse_stats(arg, &env.output_spec);
 226                 if (err)
 227                         return err;
 228                 break;
 229         case 's':
 230                 err = parse_stats(arg, &env.sort_spec);
 231                 if (err)
 232                         return err;
 233                 break;
 234         case 'o':
 235                 if (strcmp(arg, "table") == 0) {
 236                         env.out_fmt = RESFMT_TABLE;
 237                 } else if (strcmp(arg, "csv") == 0) {
 238                         env.out_fmt = RESFMT_CSV;
 239                 } else {
 240                         fprintf(stderr, "Unrecognized output format '%s'\n", arg);
 241                         return -EINVAL;
 242                 }
 243                 break;
 244         case 'l':
 245                 errno = 0;
 246                 env.log_level = strtol(arg, NULL, 10);
 247                 if (errno) {
 248                         fprintf(stderr, "invalid log level: %s\n", arg);
 249                         argp_usage(state);
 250                 }
 251                 break;
 252         case 'C':
 253                 env.comparison_mode = true;
 254                 break;
 255         case 'R':
 256                 env.replay_mode = true;
 257                 break;
 258         case 'f':
 259                 if (arg[0] == '@')
 260                         err = append_filter_file(arg + 1);
 261                 else if (arg[0] == '!')
 262                         err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
 263                 else
 264                         err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
 265                 if (err) {
 266                         fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
 267                         return err;
 268                 }
 269                 break;
 270         case ARGP_KEY_ARG:
 271                 tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
 272                 if (!tmp)
 273                         return -ENOMEM;
 274                 env.filenames = tmp;
 275                 env.filenames[env.filename_cnt] = strdup(arg);
 276                 if (!env.filenames[env.filename_cnt])
 277                         return -ENOMEM;
 278                 env.filename_cnt++;
 279                 break;
 280         default:
 281                 return ARGP_ERR_UNKNOWN;
 282         }
 283         return 0;
 284 }
 285
 286 static const struct argp argp = {
 287         .options = opts,
 288         .parser = parse_arg,
 289         .doc = argp_program_doc,
 290 };
 291
 292
 293 /* Adapted from perf/util/string.c */
 294 static bool glob_matches(const char *str, const char *pat)
 295 {
 296         while (*str && *pat && *pat != '*') {
 297                 if (*str != *pat)
 298                         return false;
 299                 str++;
 300                 pat++;
 301         }
 302         /* Check wild card */
 303         if (*pat == '*') {
 304                 while (*pat == '*')
 305                         pat++;
 306                 if (!*pat) /* Tail wild card matches all */
 307                         return true;
 308                 while (*str)
 309                         if (glob_matches(str++, pat))
 310                                 return true;
 311         }
 312         return !*str && !*pat;
 313 }
 314
 315 static bool is_bpf_obj_file(const char *path) {
 316         Elf64_Ehdr *ehdr;
 317         int fd, err = -EINVAL;
 318         Elf *elf = NULL;
 319
 320         fd = open(path, O_RDONLY | O_CLOEXEC);
 321         if (fd < 0)
 322                 return true; /* we'll fail later and propagate error */
 323
 324         /* ensure libelf is initialized */
 325         (void)elf_version(EV_CURRENT);
 326
 327         elf = elf_begin(fd, ELF_C_READ, NULL);
 328         if (!elf)
 329                 goto cleanup;
 330
 331         if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
 332                 goto cleanup;
 333
 334         ehdr = elf64_getehdr(elf);
 335         /* Old LLVM set e_machine to EM_NONE */
 336         if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
 337                 goto cleanup;
 338
 339         err = 0;
 340 cleanup:
 341         if (elf)
 342                 elf_end(elf);
 343         close(fd);
 344         return err == 0;
 345 }
 346
 347 static bool should_process_file_prog(const char *filename, const char *prog_name)
 348 {
 349         struct filter *f;
 350         int i, allow_cnt = 0;
 351
 352         for (i = 0; i < env.deny_filter_cnt; i++) {
 353                 f = &env.deny_filters[i];
 354                 if (f->kind != FILTER_NAME)
 355                         continue;
 356
 357                 if (f->any_glob && glob_matches(filename, f->any_glob))
 358                         return false;
 359                 if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
 360                         return false;
 361                 if (f->file_glob && glob_matches(filename, f->file_glob))
 362                         return false;
 363                 if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
 364                         return false;
 365         }
 366
 367         for (i = 0; i < env.allow_filter_cnt; i++) {
 368                 f = &env.allow_filters[i];
 369                 if (f->kind != FILTER_NAME)
 370                         continue;
 371
 372                 allow_cnt++;
 373                 if (f->any_glob) {
 374                         if (glob_matches(filename, f->any_glob))
 375                                 return true;
 376                         /* If we don't know program name yet, any_glob filter
 377                          * has to assume that current BPF object file might be
 378                          * relevant; we'll check again later on after opening
 379                          * BPF object file, at which point program name will
 380                          * be known finally.
 381                          */
 382                         if (!prog_name || glob_matches(prog_name, f->any_glob))
 383                                 return true;
 384                 } else {
 385                         if (f->file_glob && !glob_matches(filename, f->file_glob))
 386                                 continue;
 387                         if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
 388                                 continue;
 389                         return true;
 390                 }
 391         }
 392
 393         /* if there are no file/prog name allow filters, allow all progs,
 394          * unless they are denied earlier explicitly
 395          */
 396         return allow_cnt == 0;
 397 }
 398
 399 static struct {
 400         enum operator_kind op_kind;
 401         const char *op_str;
 402 } operators[] = {
 403         /* Order of these definitions matter to avoid situations like '<'
 404          * matching part of what is actually a '<>' operator. That is,
 405          * substrings should go last.
 406          */
 407         { OP_EQ, "==" },
 408         { OP_NEQ, "!=" },
 409         { OP_NEQ, "<>" },
 410         { OP_LE, "<=" },
 411         { OP_LT, "<" },
 412         { OP_GE, ">=" },
 413         { OP_GT, ">" },
 414         { OP_EQ, "=" },
 415 };
 416
 417 static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var);
 418
 419 static int append_filter(struct filter **filters, int *cnt, const char *str)
 420 {
 421         struct filter *f;
 422         void *tmp;
 423         const char *p;
 424         int i;
 425
 426         tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
 427         if (!tmp)
 428                 return -ENOMEM;
 429         *filters = tmp;
 430
 431         f = &(*filters)[*cnt];
 432         memset(f, 0, sizeof(*f));
 433
 434         /* First, let's check if it's a stats filter of the following form:
 435          * <stat><op><value, where:
 436          *   - <stat> is one of supported numerical stats (verdict is also
 437          *     considered numerical, failure == 0, success == 1);
 438          *   - <op> is comparison operator (see `operators` definitions);
 439          *   - <value> is an integer (or failure/success, or false/true as
 440          *     special aliases for 0 and 1, respectively).
 441          * If the form doesn't match what user provided, we assume file/prog
 442          * glob filter.
 443          */
 444         for (i = 0; i < ARRAY_SIZE(operators); i++) {
 445                 enum stat_variant var;
 446                 int id;
 447                 long val;
 448                 const char *end = str;
 449                 const char *op_str;
 450
 451                 op_str = operators[i].op_str;
 452                 p = strstr(str, op_str);
 453                 if (!p)
 454                         continue;
 455
 456                 if (!parse_stat_id_var(str, p - str, &id, &var)) {
 457                         fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
 458                         return -EINVAL;
 459                 }
 460                 if (id >= FILE_NAME) {
 461                         fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
 462                         return -EINVAL;
 463                 }
 464
 465                 p += strlen(op_str);
 466
 467                 if (strcasecmp(p, "true") == 0 ||
 468                     strcasecmp(p, "t") == 0 ||
 469                     strcasecmp(p, "success") == 0 ||
 470                     strcasecmp(p, "succ") == 0 ||
 471                     strcasecmp(p, "s") == 0 ||
 472                     strcasecmp(p, "match") == 0 ||
 473                     strcasecmp(p, "m") == 0) {
 474                         val = 1;
 475                 } else if (strcasecmp(p, "false") == 0 ||
 476                            strcasecmp(p, "f") == 0 ||
 477                            strcasecmp(p, "failure") == 0 ||
 478                            strcasecmp(p, "fail") == 0 ||
 479                            strcasecmp(p, "mismatch") == 0 ||
 480                            strcasecmp(p, "mis") == 0) {
 481                         val = 0;
 482                 } else {
 483                         errno = 0;
 484                         val = strtol(p, (char **)&end, 10);
 485                         if (errno || end == p || *end != '\0' ) {
 486                                 fprintf(stderr, "Invalid integer value in '%s'!\n", str);
 487                                 return -EINVAL;
 488                         }
 489                 }
 490
 491                 f->kind = FILTER_STAT;
 492                 f->stat_id = id;
 493                 f->stat_var = var;
 494                 f->op = operators[i].op_kind;
 495                 f->value = val;
 496
 497                 *cnt += 1;
 498                 return 0;
 499         }
 500
 501         /* File/prog filter can be specified either as '<glob>' or
 502          * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
 503          * both file and program names. This seems to be way more useful in
 504          * practice. If user needs full control, they can use '/<prog-glob>'
 505          * form to glob just program name, or '<file-glob>/' to glob only file
 506          * name. But usually common <glob> seems to be the most useful and
 507          * ergonomic way.
 508          */
 509         f->kind = FILTER_NAME;
 510         p = strchr(str, '/');
 511         if (!p) {
 512                 f->any_glob = strdup(str);
 513                 if (!f->any_glob)
 514                         return -ENOMEM;
 515         } else {
 516                 if (str != p) {
 517                         /* non-empty file glob */
 518                         f->file_glob = strndup(str, p - str);
 519                         if (!f->file_glob)
 520                                 return -ENOMEM;
 521                 }
 522                 if (strlen(p + 1) > 0) {
 523                         /* non-empty prog glob */
 524                         f->prog_glob = strdup(p + 1);
 525                         if (!f->prog_glob) {
 526                                 free(f->file_glob);
 527                                 f->file_glob = NULL;
 528                                 return -ENOMEM;
 529                         }
 530                 }
 531         }
 532
 533         *cnt += 1;
 534         return 0;
 535 }
 536
 537 static int append_filter_file(const char *path)
 538 {
 539         char buf[1024];
 540         FILE *f;
 541         int err = 0;
 542
 543         f = fopen(path, "r");
 544         if (!f) {
 545                 err = -errno;
 546                 fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
 547                 return err;
 548         }
 549
 550         while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
 551                 /* lines starting with # are comments, skip them */
 552                 if (buf[0] == '\0' || buf[0] == '#')
 553                         continue;
 554                 /* lines starting with ! are negative match filters */
 555                 if (buf[0] == '!')
 556                         err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
 557                 else
 558                         err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
 559                 if (err)
 560                         goto cleanup;
 561         }
 562
 563 cleanup:
 564         fclose(f);
 565         return err;
 566 }
 567
 568 static const struct stat_specs default_output_spec = {
 569         .spec_cnt = 7,
 570         .ids = {
 571                 FILE_NAME, PROG_NAME, VERDICT, DURATION,
 572                 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 573         },
 574 };
 575
 576 static const struct stat_specs default_csv_output_spec = {
 577         .spec_cnt = 9,
 578         .ids = {
 579                 FILE_NAME, PROG_NAME, VERDICT, DURATION,
 580                 TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 581                 MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
 582         },
 583 };
 584
 585 static const struct stat_specs default_sort_spec = {
 586         .spec_cnt = 2,
 587         .ids = {
 588                 FILE_NAME, PROG_NAME,
 589         },
 590         .asc = { true, true, },
 591 };
 592
 593 /* sorting for comparison mode to join two data sets */
 594 static const struct stat_specs join_sort_spec = {
 595         .spec_cnt = 2,
 596         .ids = {
 597                 FILE_NAME, PROG_NAME,
 598         },
 599         .asc = { true, true, },
 600 };
 601
 602 static struct stat_def {
 603         const char *header;
 604         const char *names[4];
 605         bool asc_by_default;
 606         bool left_aligned;
 607 } stat_defs[] = {
 608         [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
 609         [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
 610         [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
 611         [DURATION] = { "Duration (us)", {"duration", "dur"}, },
 612         [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
 613         [TOTAL_STATES] = { "States", {"total_states", "states"}, },
 614         [PEAK_STATES] = { "Peak states", {"peak_states"}, },
 615         [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
 616         [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
 617 };
 618
 619 static bool parse_stat_id_var(const char *name, size_t len, int *id, enum stat_variant *var)
 620 {
 621         static const char *var_sfxs[] = {
 622                 [VARIANT_A] = "_a",
 623                 [VARIANT_B] = "_b",
 624                 [VARIANT_DIFF] = "_diff",
 625                 [VARIANT_PCT] = "_pct",
 626         };
 627         int i, j, k;
 628
 629         for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
 630                 struct stat_def *def = &stat_defs[i];
 631                 size_t alias_len, sfx_len;
 632                 const char *alias;
 633
 634                 for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
 635                         alias = def->names[j];
 636                         if (!alias)
 637                                 continue;
 638
 639                         alias_len = strlen(alias);
 640                         if (strncmp(name, alias, alias_len) != 0)
 641                                 continue;
 642
 643                         if (alias_len == len) {
 644                                 /* If no variant suffix is specified, we
 645                                  * assume control group (just in case we are
 646                                  * in comparison mode. Variant is ignored in
 647                                  * non-comparison mode.
 648                                  */
 649                                 *var = VARIANT_B;
 650                                 *id = i;
 651                                 return true;
 652                         }
 653
 654                         for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
 655                                 sfx_len = strlen(var_sfxs[k]);
 656                                 if (alias_len + sfx_len != len)
 657                                         continue;
 658
 659                                 if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
 660                                         *var = (enum stat_variant)k;
 661                                         *id = i;
 662                                         return true;
 663                                 }
 664                         }
 665                 }
 666         }
 667
 668         return false;
 669 }
 670
 671 static bool is_asc_sym(char c)
 672 {
 673         return c == '^';
 674 }
 675
 676 static bool is_desc_sym(char c)
 677 {
 678         return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
 679 }
 680
 681 static int parse_stat(const char *stat_name, struct stat_specs *specs)
 682 {
 683         int id;
 684         bool has_order = false, is_asc = false;
 685         size_t len = strlen(stat_name);
 686         enum stat_variant var;
 687
 688         if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
 689                 fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
 690                 return -E2BIG;
 691         }
 692
 693         if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
 694                 has_order = true;
 695                 is_asc = is_asc_sym(stat_name[len - 1]);
 696                 len -= 1;
 697         }
 698
 699         if (!parse_stat_id_var(stat_name, len, &id, &var)) {
 700                 fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
 701                 return -ESRCH;
 702         }
 703
 704         specs->ids[specs->spec_cnt] = id;
 705         specs->variants[specs->spec_cnt] = var;
 706         specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
 707         specs->spec_cnt++;
 708
 709         return 0;
 710 }
 711
 712 static int parse_stats(const char *stats_str, struct stat_specs *specs)
 713 {
 714         char *input, *state = NULL, *next;
 715         int err;
 716
 717         input = strdup(stats_str);
 718         if (!input)
 719                 return -ENOMEM;
 720
 721         while ((next = strtok_r(state ? NULL : input, ",", &state))) {
 722                 err = parse_stat(next, specs);
 723                 if (err)
 724                         return err;
 725         }
 726
 727         return 0;
 728 }
 729
 730 static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
 731 {
 732         int i;
 733
 734         if (!stats)
 735                 return;
 736
 737         for (i = 0; i < stat_cnt; i++) {
 738                 free(stats[i].file_name);
 739                 free(stats[i].prog_name);
 740         }
 741         free(stats);
 742 }
 743
 744 static char verif_log_buf[64 * 1024];
 745
 746 #define MAX_PARSED_LOG_LINES 100
 747
 748 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
 749 {
 750         const char *cur;
 751         int pos, lines;
 752
 753         buf[buf_sz - 1] = '\0';
 754
 755         for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
 756                 /* find previous endline or otherwise take the start of log buf */
 757                 for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
 758                 }
 759                 /* next time start from end of previous line (or pos goes to <0) */
 760                 pos--;
 761                 /* if we found endline, point right after endline symbol;
 762                  * otherwise, stay at the beginning of log buf
 763                  */
 764                 if (cur[0] == '\n')
 765                         cur++;
 766
 767                 if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
 768                         continue;
 769                 if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
 770                                 &s->stats[TOTAL_INSNS],
 771                                 &s->stats[MAX_STATES_PER_INSN],
 772                                 &s->stats[TOTAL_STATES],
 773                                 &s->stats[PEAK_STATES],
 774                                 &s->stats[MARK_READ_MAX_LEN]))
 775                         continue;
 776         }
 777
 778         return 0;
 779 }
 780
 781 static void fixup_obj(struct bpf_object *obj)
 782 {
 783         struct bpf_map *map;
 784
 785         bpf_object__for_each_map(map, obj) {
 786                 /* disable pinning */
 787                 bpf_map__set_pin_path(map, NULL);
 788
 789                 /* fix up map size, if necessary */
 790                 switch (bpf_map__type(map)) {
 791                 case BPF_MAP_TYPE_SK_STORAGE:
 792                 case BPF_MAP_TYPE_TASK_STORAGE:
 793                 case BPF_MAP_TYPE_INODE_STORAGE:
 794                 case BPF_MAP_TYPE_CGROUP_STORAGE:
 795                         break;
 796                 default:
 797                         if (bpf_map__max_entries(map) == 0)
 798                                 bpf_map__set_max_entries(map, 1);
 799                 }
 800         }
 801 }
 802
 803 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
 804 {
 805         const char *prog_name = bpf_program__name(prog);
 806         size_t buf_sz = sizeof(verif_log_buf);
 807         char *buf = verif_log_buf;
 808         struct verif_stats *stats;
 809         int err = 0;
 810         void *tmp;
 811
 812         if (!should_process_file_prog(basename(filename), bpf_program__name(prog))) {
 813                 env.progs_skipped++;
 814                 return 0;
 815         }
 816
 817         tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
 818         if (!tmp)
 819                 return -ENOMEM;
 820         env.prog_stats = tmp;
 821         stats = &env.prog_stats[env.prog_stat_cnt++];
 822         memset(stats, 0, sizeof(*stats));
 823
 824         if (env.verbose) {
 825                 buf_sz = 16 * 1024 * 1024;
 826                 buf = malloc(buf_sz);
 827                 if (!buf)
 828                         return -ENOMEM;
 829                 bpf_program__set_log_buf(prog, buf, buf_sz);
 830                 bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
 831         } else {
 832                 bpf_program__set_log_buf(prog, buf, buf_sz);
 833                 bpf_program__set_log_level(prog, 4); /* only verifier stats */
 834         }
 835         verif_log_buf[0] = '\0';
 836
 837         /* increase chances of successful BPF object loading */
 838         fixup_obj(obj);
 839
 840         err = bpf_object__load(obj);
 841         env.progs_processed++;
 842
 843         stats->file_name = strdup(basename(filename));
 844         stats->prog_name = strdup(bpf_program__name(prog));
 845         stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
 846         parse_verif_log(buf, buf_sz, stats);
 847
 848         if (env.verbose) {
 849                 printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
 850                        filename, prog_name, stats->stats[DURATION],
 851                        err ? "failure" : "success", buf);
 852         }
 853
 854         if (verif_log_buf != buf)
 855                 free(buf);
 856
 857         return 0;
 858 };
 859
 860 static int process_obj(const char *filename)
 861 {
 862         struct bpf_object *obj = NULL, *tobj;
 863         struct bpf_program *prog, *tprog, *lprog;
 864         libbpf_print_fn_t old_libbpf_print_fn;
 865         LIBBPF_OPTS(bpf_object_open_opts, opts);
 866         int err = 0, prog_cnt = 0;
 867
 868         if (!should_process_file_prog(basename(filename), NULL)) {
 869                 if (env.verbose)
 870                         printf("Skipping '%s' due to filters...\n", filename);
 871                 env.files_skipped++;
 872                 return 0;
 873         }
 874         if (!is_bpf_obj_file(filename)) {
 875                 if (env.verbose)
 876                         printf("Skipping '%s' as it's not a BPF object file...\n", filename);
 877                 env.files_skipped++;
 878                 return 0;
 879         }
 880
 881         if (!env.quiet && env.out_fmt == RESFMT_TABLE)
 882                 printf("Processing '%s'...\n", basename(filename));
 883
 884         old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
 885         obj = bpf_object__open_file(filename, &opts);
 886         if (!obj) {
 887                 /* if libbpf can't open BPF object file, it could be because
 888                  * that BPF object file is incomplete and has to be statically
 889                  * linked into a final BPF object file; instead of bailing
 890                  * out, report it into stderr, mark it as skipped, and
 891                  * proceed
 892                  */
 893                 fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
 894                 env.files_skipped++;
 895                 err = 0;
 896                 goto cleanup;
 897         }
 898
 899         env.files_processed++;
 900
 901         bpf_object__for_each_program(prog, obj) {
 902                 prog_cnt++;
 903         }
 904
 905         if (prog_cnt == 1) {
 906                 prog = bpf_object__next_program(obj, NULL);
 907                 bpf_program__set_autoload(prog, true);
 908                 process_prog(filename, obj, prog);
 909                 goto cleanup;
 910         }
 911
 912         bpf_object__for_each_program(prog, obj) {
 913                 const char *prog_name = bpf_program__name(prog);
 914
 915                 tobj = bpf_object__open_file(filename, &opts);
 916                 if (!tobj) {
 917                         err = -errno;
 918                         fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
 919                         goto cleanup;
 920                 }
 921
 922                 bpf_object__for_each_program(tprog, tobj) {
 923                         const char *tprog_name = bpf_program__name(tprog);
 924
 925                         if (strcmp(prog_name, tprog_name) == 0) {
 926                                 bpf_program__set_autoload(tprog, true);
 927                                 lprog = tprog;
 928                         } else {
 929                                 bpf_program__set_autoload(tprog, false);
 930                         }
 931                 }
 932
 933                 process_prog(filename, tobj, lprog);
 934                 bpf_object__close(tobj);
 935         }
 936
 937 cleanup:
 938         bpf_object__close(obj);
 939         libbpf_set_print(old_libbpf_print_fn);
 940         return err;
 941 }
 942
 943 static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 944                     enum stat_id id, bool asc)
 945 {
 946         int cmp = 0;
 947
 948         switch (id) {
 949         case FILE_NAME:
 950                 cmp = strcmp(s1->file_name, s2->file_name);
 951                 break;
 952         case PROG_NAME:
 953                 cmp = strcmp(s1->prog_name, s2->prog_name);
 954                 break;
 955         case VERDICT:
 956         case DURATION:
 957         case TOTAL_INSNS:
 958         case TOTAL_STATES:
 959         case PEAK_STATES:
 960         case MAX_STATES_PER_INSN:
 961         case MARK_READ_MAX_LEN: {
 962                 long v1 = s1->stats[id];
 963                 long v2 = s2->stats[id];
 964
 965                 if (v1 != v2)
 966                         cmp = v1 < v2 ? -1 : 1;
 967                 break;
 968         }
 969         default:
 970                 fprintf(stderr, "Unrecognized stat #%d\n", id);
 971                 exit(1);
 972         }
 973
 974         return asc ? cmp : -cmp;
 975 }
 976
 977 static int cmp_prog_stats(const void *v1, const void *v2)
 978 {
 979         const struct verif_stats *s1 = v1, *s2 = v2;
 980         int i, cmp;
 981
 982         for (i = 0; i < env.sort_spec.spec_cnt; i++) {
 983                 cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
 984                 if (cmp != 0)
 985                         return cmp;
 986         }
 987
 988         /* always disambiguate with file+prog, which are unique */
 989         cmp = strcmp(s1->file_name, s2->file_name);
 990         if (cmp != 0)
 991                 return cmp;
 992         return strcmp(s1->prog_name, s2->prog_name);
 993 }
 994
 995 static void fetch_join_stat_value(const struct verif_stats_join *s,
 996                                   enum stat_id id, enum stat_variant var,
 997                                   const char **str_val,
 998                                   double *num_val)
 999 {
1000         long v1, v2;
1001
1002         if (id == FILE_NAME) {
1003                 *str_val = s->file_name;
1004                 return;
1005         }
1006         if (id == PROG_NAME) {
1007                 *str_val = s->prog_name;
1008                 return;
1009         }
1010
1011         v1 = s->stats_a ? s->stats_a->stats[id] : 0;
1012         v2 = s->stats_b ? s->stats_b->stats[id] : 0;
1013
1014         switch (var) {
1015         case VARIANT_A:
1016                 if (!s->stats_a)
1017                         *num_val = -DBL_MAX;
1018                 else
1019                         *num_val = s->stats_a->stats[id];
1020                 return;
1021         case VARIANT_B:
1022                 if (!s->stats_b)
1023                         *num_val = -DBL_MAX;
1024                 else
1025                         *num_val = s->stats_b->stats[id];
1026                 return;
1027         case VARIANT_DIFF:
1028                 if (!s->stats_a || !s->stats_b)
1029                         *num_val = -DBL_MAX;
1030                 else if (id == VERDICT)
1031                         *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
1032                 else
1033                         *num_val = (double)(v2 - v1);
1034                 return;
1035         case VARIANT_PCT:
1036                 if (!s->stats_a || !s->stats_b) {
1037                         *num_val = -DBL_MAX;
1038                 } else if (v1 == 0) {
1039                         if (v1 == v2)
1040                                 *num_val = 0.0;
1041                         else
1042                                 *num_val = v2 < v1 ? -100.0 : 100.0;
1043                 } else {
1044                          *num_val = (v2 - v1) * 100.0 / v1;
1045                 }
1046                 return;
1047         }
1048 }
1049
1050 static int cmp_join_stat(const struct verif_stats_join *s1,
1051                          const struct verif_stats_join *s2,
1052                          enum stat_id id, enum stat_variant var, bool asc)
1053 {
1054         const char *str1 = NULL, *str2 = NULL;
1055         double v1, v2;
1056         int cmp = 0;
1057
1058         fetch_join_stat_value(s1, id, var, &str1, &v1);
1059         fetch_join_stat_value(s2, id, var, &str2, &v2);
1060
1061         if (str1)
1062                 cmp = strcmp(str1, str2);
1063         else if (v1 != v2)
1064                 cmp = v1 < v2 ? -1 : 1;
1065
1066         return asc ? cmp : -cmp;
1067 }
1068
1069 static int cmp_join_stats(const void *v1, const void *v2)
1070 {
1071         const struct verif_stats_join *s1 = v1, *s2 = v2;
1072         int i, cmp;
1073
1074         for (i = 0; i < env.sort_spec.spec_cnt; i++) {
1075                 cmp = cmp_join_stat(s1, s2,
1076                                     env.sort_spec.ids[i],
1077                                     env.sort_spec.variants[i],
1078                                     env.sort_spec.asc[i]);
1079                 if (cmp != 0)
1080                         return cmp;
1081         }
1082
1083         /* always disambiguate with file+prog, which are unique */
1084         cmp = strcmp(s1->file_name, s2->file_name);
1085         if (cmp != 0)
1086                 return cmp;
1087         return strcmp(s1->prog_name, s2->prog_name);
1088 }
1089
1090 #define HEADER_CHAR '-'
1091 #define COLUMN_SEP "  "
1092
1093 static void output_header_underlines(void)
1094 {
1095         int i, j, len;
1096
1097         for (i = 0; i < env.output_spec.spec_cnt; i++) {
1098                 len = env.output_spec.lens[i];
1099
1100                 printf("%s", i == 0 ? "" : COLUMN_SEP);
1101                 for (j = 0; j < len; j++)
1102                         printf("%c", HEADER_CHAR);
1103         }
1104         printf("\n");
1105 }
1106
1107 static void output_headers(enum resfmt fmt)
1108 {
1109         const char *fmt_str;
1110         int i, len;
1111
1112         for (i = 0; i < env.output_spec.spec_cnt; i++) {
1113                 int id = env.output_spec.ids[i];
1114                 int *max_len = &env.output_spec.lens[i];
1115
1116                 switch (fmt) {
1117                 case RESFMT_TABLE_CALCLEN:
1118                         len = snprintf(NULL, 0, "%s", stat_defs[id].header);
1119                         if (len > *max_len)
1120                                 *max_len = len;
1121                         break;
1122                 case RESFMT_TABLE:
1123                         fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
1124                         printf(fmt_str, i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
1125                         if (i == env.output_spec.spec_cnt - 1)
1126                                 printf("\n");
1127                         break;
1128                 case RESFMT_CSV:
1129                         printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
1130                         if (i == env.output_spec.spec_cnt - 1)
1131                                 printf("\n");
1132                         break;
1133                 }
1134         }
1135
1136         if (fmt == RESFMT_TABLE)
1137                 output_header_underlines();
1138 }
1139
1140 static void prepare_value(const struct verif_stats *s, enum stat_id id,
1141                           const char **str, long *val)
1142 {
1143         switch (id) {
1144         case FILE_NAME:
1145                 *str = s ? s->file_name : "N/A";
1146                 break;
1147         case PROG_NAME:
1148                 *str = s ? s->prog_name : "N/A";
1149                 break;
1150         case VERDICT:
1151                 if (!s)
1152                         *str = "N/A";
1153                 else
1154                         *str = s->stats[VERDICT] ? "success" : "failure";
1155                 break;
1156         case DURATION:
1157         case TOTAL_INSNS:
1158         case TOTAL_STATES:
1159         case PEAK_STATES:
1160         case MAX_STATES_PER_INSN:
1161         case MARK_READ_MAX_LEN:
1162                 *val = s ? s->stats[id] : 0;
1163                 break;
1164         default:
1165                 fprintf(stderr, "Unrecognized stat #%d\n", id);
1166                 exit(1);
1167         }
1168 }
1169
1170 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
1171 {
1172         int i;
1173
1174         for (i = 0; i < env.output_spec.spec_cnt; i++) {
1175                 int id = env.output_spec.ids[i];
1176                 int *max_len = &env.output_spec.lens[i], len;
1177                 const char *str = NULL;
1178                 long val = 0;
1179
1180                 prepare_value(s, id, &str, &val);
1181
1182                 switch (fmt) {
1183                 case RESFMT_TABLE_CALCLEN:
1184                         if (str)
1185                                 len = snprintf(NULL, 0, "%s", str);
1186                         else
1187                                 len = snprintf(NULL, 0, "%ld", val);
1188                         if (len > *max_len)
1189                                 *max_len = len;
1190                         break;
1191                 case RESFMT_TABLE:
1192                         if (str)
1193                                 printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
1194                         else
1195                                 printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
1196                         if (i == env.output_spec.spec_cnt - 1)
1197                                 printf("\n");
1198                         break;
1199                 case RESFMT_CSV:
1200                         if (str)
1201                                 printf("%s%s", i == 0 ? "" : ",", str);
1202                         else
1203                                 printf("%s%ld", i == 0 ? "" : ",", val);
1204                         if (i == env.output_spec.spec_cnt - 1)
1205                                 printf("\n");
1206                         break;
1207                 }
1208         }
1209
1210         if (last && fmt == RESFMT_TABLE) {
1211                 output_header_underlines();
1212                 printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
1213                        env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
1214         }
1215 }
1216
1217 static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
1218 {
1219         switch (id) {
1220         case FILE_NAME:
1221                 st->file_name = strdup(str);
1222                 if (!st->file_name)
1223                         return -ENOMEM;
1224                 break;
1225         case PROG_NAME:
1226                 st->prog_name = strdup(str);
1227                 if (!st->prog_name)
1228                         return -ENOMEM;
1229                 break;
1230         case VERDICT:
1231                 if (strcmp(str, "success") == 0) {
1232                         st->stats[VERDICT] = true;
1233                 } else if (strcmp(str, "failure") == 0) {
1234                         st->stats[VERDICT] = false;
1235                 } else {
1236                         fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
1237                         return -EINVAL;
1238                 }
1239                 break;
1240         case DURATION:
1241         case TOTAL_INSNS:
1242         case TOTAL_STATES:
1243         case PEAK_STATES:
1244         case MAX_STATES_PER_INSN:
1245         case MARK_READ_MAX_LEN: {
1246                 long val;
1247                 int err, n;
1248
1249                 if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
1250                         err = -errno;
1251                         fprintf(stderr, "Failed to parse '%s' as integer\n", str);
1252                         return err;
1253                 }
1254
1255                 st->stats[id] = val;
1256                 break;
1257         }
1258         default:
1259                 fprintf(stderr, "Unrecognized stat #%d\n", id);
1260                 return -EINVAL;
1261         }
1262         return 0;
1263 }
1264
1265 static int parse_stats_csv(const char *filename, struct stat_specs *specs,
1266                            struct verif_stats **statsp, int *stat_cntp)
1267 {
1268         char line[4096];
1269         FILE *f;
1270         int err = 0;
1271         bool header = true;
1272
1273         f = fopen(filename, "r");
1274         if (!f) {
1275                 err = -errno;
1276                 fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
1277                 return err;
1278         }
1279
1280         *stat_cntp = 0;
1281
1282         while (fgets(line, sizeof(line), f)) {
1283                 char *input = line, *state = NULL, *next;
1284                 struct verif_stats *st = NULL;
1285                 int col = 0;
1286
1287                 if (!header) {
1288                         void *tmp;
1289
1290                         tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
1291                         if (!tmp) {
1292                                 err = -ENOMEM;
1293                                 goto cleanup;
1294                         }
1295                         *statsp = tmp;
1296
1297                         st = &(*statsp)[*stat_cntp];
1298                         memset(st, 0, sizeof(*st));
1299
1300                         *stat_cntp += 1;
1301                 }
1302
1303                 while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
1304                         if (header) {
1305                                 /* for the first line, set up spec stats */
1306                                 err = parse_stat(next, specs);
1307                                 if (err)
1308                                         goto cleanup;
1309                                 continue;
1310                         }
1311
1312                         /* for all other lines, parse values based on spec */
1313                         if (col >= specs->spec_cnt) {
1314                                 fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
1315                                         col, *stat_cntp, filename);
1316                                 err = -EINVAL;
1317                                 goto cleanup;
1318                         }
1319                         err = parse_stat_value(next, specs->ids[col], st);
1320                         if (err)
1321                                 goto cleanup;
1322                         col++;
1323                 }
1324
1325                 if (header) {
1326                         header = false;
1327                         continue;
1328                 }
1329
1330                 if (col < specs->spec_cnt) {
1331                         fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
1332                                 *stat_cntp, filename);
1333                         err = -EINVAL;
1334                         goto cleanup;
1335                 }
1336
1337                 if (!st->file_name || !st->prog_name) {
1338                         fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
1339                                 *stat_cntp, filename);
1340                         err = -EINVAL;
1341                         goto cleanup;
1342                 }
1343
1344                 /* in comparison mode we can only check filters after we
1345                  * parsed entire line; if row should be ignored we pretend we
1346                  * never parsed it
1347                  */
1348                 if (!should_process_file_prog(st->file_name, st->prog_name)) {
1349                         free(st->file_name);
1350                         free(st->prog_name);
1351                         *stat_cntp -= 1;
1352                 }
1353         }
1354
1355         if (!feof(f)) {
1356                 err = -errno;
1357                 fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
1358         }
1359
1360 cleanup:
1361         fclose(f);
1362         return err;
1363 }
1364
1365 /* empty/zero stats for mismatched rows */
1366 static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
1367
1368 static bool is_key_stat(enum stat_id id)
1369 {
1370         return id == FILE_NAME || id == PROG_NAME;
1371 }
1372
1373 static void output_comp_header_underlines(void)
1374 {
1375         int i, j, k;
1376
1377         for (i = 0; i < env.output_spec.spec_cnt; i++) {
1378                 int id = env.output_spec.ids[i];
1379                 int max_j = is_key_stat(id) ? 1 : 3;
1380
1381                 for (j = 0; j < max_j; j++) {
1382                         int len = env.output_spec.lens[3 * i + j];
1383
1384                         printf("%s", i + j == 0 ? "" : COLUMN_SEP);
1385
1386                         for (k = 0; k < len; k++)
1387                                 printf("%c", HEADER_CHAR);
1388                 }
1389         }
1390         printf("\n");
1391 }
1392
1393 static void output_comp_headers(enum resfmt fmt)
1394 {
1395         static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
1396         static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
1397         int i, j, len;
1398
1399         for (i = 0; i < env.output_spec.spec_cnt; i++) {
1400                 int id = env.output_spec.ids[i];
1401                 /* key stats don't have A/B/DIFF columns, they are common for both data sets */
1402                 int max_j = is_key_stat(id) ? 1 : 3;
1403
1404                 for (j = 0; j < max_j; j++) {
1405                         int *max_len = &env.output_spec.lens[3 * i + j];
1406                         bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
1407                         const char *sfx;
1408
1409                         switch (fmt) {
1410                         case RESFMT_TABLE_CALCLEN:
1411                                 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1412                                 len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
1413                                 if (len > *max_len)
1414                                         *max_len = len;
1415                                 break;
1416                         case RESFMT_TABLE:
1417                                 sfx = is_key_stat(id) ? "" : table_sfxs[j];
1418                                 printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
1419                                        *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
1420                                 if (last)
1421                                         printf("\n");
1422                                 break;
1423                         case RESFMT_CSV:
1424                                 sfx = is_key_stat(id) ? "" : name_sfxs[j];
1425                                 printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
1426                                 if (last)
1427                                         printf("\n");
1428                                 break;
1429                         }
1430                 }
1431         }
1432
1433         if (fmt == RESFMT_TABLE)
1434                 output_comp_header_underlines();
1435 }
1436
1437 static void output_comp_stats(const struct verif_stats_join *join_stats,
1438                               enum resfmt fmt, bool last)
1439 {
1440         const struct verif_stats *base = join_stats->stats_a;
1441         const struct verif_stats *comp = join_stats->stats_b;
1442         char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
1443         int i;
1444
1445         for (i = 0; i < env.output_spec.spec_cnt; i++) {
1446                 int id = env.output_spec.ids[i], len;
1447                 int *max_len_base = &env.output_spec.lens[3 * i + 0];
1448                 int *max_len_comp = &env.output_spec.lens[3 * i + 1];
1449                 int *max_len_diff = &env.output_spec.lens[3 * i + 2];
1450                 const char *base_str = NULL, *comp_str = NULL;
1451                 long base_val = 0, comp_val = 0, diff_val = 0;
1452
1453                 prepare_value(base, id, &base_str, &base_val);
1454                 prepare_value(comp, id, &comp_str, &comp_val);
1455
1456                 /* normalize all the outputs to be in string buffers for simplicity */
1457                 if (is_key_stat(id)) {
1458                         /* key stats (file and program name) are always strings */
1459                         if (base)
1460                                 snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1461                         else
1462                                 snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
1463                 } else if (base_str) {
1464                         snprintf(base_buf, sizeof(base_buf), "%s", base_str);
1465                         snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
1466                         if (!base || !comp)
1467                                 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1468                         else if (strcmp(base_str, comp_str) == 0)
1469                                 snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
1470                         else
1471                                 snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
1472                 } else {
1473                         double p = 0.0;
1474
1475                         if (base)
1476                                 snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
1477                         else
1478                                 snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
1479                         if (comp)
1480                                 snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
1481                         else
1482                                 snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
1483
1484                         diff_val = comp_val - base_val;
1485                         if (!base || !comp) {
1486                                 snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
1487                         } else {
1488                                 if (base_val == 0) {
1489                                         if (comp_val == base_val)
1490                                                 p = 0.0; /* avoid +0 (+100%) case */
1491                                         else
1492                                                 p = comp_val < base_val ? -100.0 : 100.0;
1493                                 } else {
1494                                          p = diff_val * 100.0 / base_val;
1495                                 }
1496                                 snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
1497                         }
1498                 }
1499
1500                 switch (fmt) {
1501                 case RESFMT_TABLE_CALCLEN:
1502                         len = strlen(base_buf);
1503                         if (len > *max_len_base)
1504                                 *max_len_base = len;
1505                         if (!is_key_stat(id)) {
1506                                 len = strlen(comp_buf);
1507                                 if (len > *max_len_comp)
1508                                         *max_len_comp = len;
1509                                 len = strlen(diff_buf);
1510                                 if (len > *max_len_diff)
1511                                         *max_len_diff = len;
1512                         }
1513                         break;
1514                 case RESFMT_TABLE: {
1515                         /* string outputs are left-aligned, number outputs are right-aligned */
1516                         const char *fmt = base_str ? "%s%-*s" : "%s%*s";
1517
1518                         printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
1519                         if (!is_key_stat(id)) {
1520                                 printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
1521                                 printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
1522                         }
1523                         if (i == env.output_spec.spec_cnt - 1)
1524                                 printf("\n");
1525                         break;
1526                 }
1527                 case RESFMT_CSV:
1528                         printf("%s%s", i == 0 ? "" : ",", base_buf);
1529                         if (!is_key_stat(id)) {
1530                                 printf("%s%s", i == 0 ? "" : ",", comp_buf);
1531                                 printf("%s%s", i == 0 ? "" : ",", diff_buf);
1532                         }
1533                         if (i == env.output_spec.spec_cnt - 1)
1534                                 printf("\n");
1535                         break;
1536                 }
1537         }
1538
1539         if (last && fmt == RESFMT_TABLE)
1540                 output_comp_header_underlines();
1541 }
1542
1543 static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
1544 {
1545         int r;
1546
1547         r = strcmp(base->file_name, comp->file_name);
1548         if (r != 0)
1549                 return r;
1550         return strcmp(base->prog_name, comp->prog_name);
1551 }
1552
1553 static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
1554 {
1555         static const double eps = 1e-9;
1556         const char *str = NULL;
1557         double value = 0.0;
1558
1559         fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
1560
1561         switch (f->op) {
1562         case OP_EQ: return value > f->value - eps && value < f->value + eps;
1563         case OP_NEQ: return value < f->value - eps || value > f->value + eps;
1564         case OP_LT: return value < f->value - eps;
1565         case OP_LE: return value <= f->value + eps;
1566         case OP_GT: return value > f->value + eps;
1567         case OP_GE: return value >= f->value - eps;
1568         }
1569
1570         fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1571         return false;
1572 }
1573
1574 static bool should_output_join_stats(const struct verif_stats_join *stats)
1575 {
1576         struct filter *f;
1577         int i, allow_cnt = 0;
1578
1579         for (i = 0; i < env.deny_filter_cnt; i++) {
1580                 f = &env.deny_filters[i];
1581                 if (f->kind != FILTER_STAT)
1582                         continue;
1583
1584                 if (is_join_stat_filter_matched(f, stats))
1585                         return false;
1586         }
1587
1588         for (i = 0; i < env.allow_filter_cnt; i++) {
1589                 f = &env.allow_filters[i];
1590                 if (f->kind != FILTER_STAT)
1591                         continue;
1592                 allow_cnt++;
1593
1594                 if (is_join_stat_filter_matched(f, stats))
1595                         return true;
1596         }
1597
1598         /* if there are no stat allowed filters, pass everything through */
1599         return allow_cnt == 0;
1600 }
1601
1602 static int handle_comparison_mode(void)
1603 {
1604         struct stat_specs base_specs = {}, comp_specs = {};
1605         struct stat_specs tmp_sort_spec;
1606         enum resfmt cur_fmt;
1607         int err, i, j, last_idx;
1608
1609         if (env.filename_cnt != 2) {
1610                 fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
1611                 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1612                 return -EINVAL;
1613         }
1614
1615         err = parse_stats_csv(env.filenames[0], &base_specs,
1616                               &env.baseline_stats, &env.baseline_stat_cnt);
1617         if (err) {
1618                 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1619                 return err;
1620         }
1621         err = parse_stats_csv(env.filenames[1], &comp_specs,
1622                               &env.prog_stats, &env.prog_stat_cnt);
1623         if (err) {
1624                 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
1625                 return err;
1626         }
1627
1628         /* To keep it simple we validate that the set and order of stats in
1629          * both CSVs are exactly the same. This can be lifted with a bit more
1630          * pre-processing later.
1631          */
1632         if (base_specs.spec_cnt != comp_specs.spec_cnt) {
1633                 fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
1634                         env.filenames[0], env.filenames[1],
1635                         base_specs.spec_cnt, comp_specs.spec_cnt);
1636                 return -EINVAL;
1637         }
1638         for (i = 0; i < base_specs.spec_cnt; i++) {
1639                 if (base_specs.ids[i] != comp_specs.ids[i]) {
1640                         fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
1641                                 env.filenames[0], env.filenames[1],
1642                                 stat_defs[base_specs.ids[i]].names[0],
1643                                 stat_defs[comp_specs.ids[i]].names[0]);
1644                         return -EINVAL;
1645                 }
1646         }
1647
1648         /* Replace user-specified sorting spec with file+prog sorting rule to
1649          * be able to join two datasets correctly. Once we are done, we will
1650          * restore the original sort spec.
1651          */
1652         tmp_sort_spec = env.sort_spec;
1653         env.sort_spec = join_sort_spec;
1654         qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1655         qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
1656         env.sort_spec = tmp_sort_spec;
1657
1658         /* Join two datasets together. If baseline and comparison datasets
1659          * have different subset of rows (we match by 'object + prog' as
1660          * a unique key) then assume empty/missing/zero value for rows that
1661          * are missing in the opposite data set.
1662          */
1663         i = j = 0;
1664         while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
1665                 const struct verif_stats *base, *comp;
1666                 struct verif_stats_join *join;
1667                 void *tmp;
1668                 int r;
1669
1670                 base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
1671                 comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
1672
1673                 if (!base->file_name || !base->prog_name) {
1674                         fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1675                                 i, env.filenames[0]);
1676                         return -EINVAL;
1677                 }
1678                 if (!comp->file_name || !comp->prog_name) {
1679                         fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
1680                                 j, env.filenames[1]);
1681                         return -EINVAL;
1682                 }
1683
1684                 tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
1685                 if (!tmp)
1686                         return -ENOMEM;
1687                 env.join_stats = tmp;
1688
1689                 join = &env.join_stats[env.join_stat_cnt];
1690                 memset(join, 0, sizeof(*join));
1691
1692                 r = cmp_stats_key(base, comp);
1693                 if (r == 0) {
1694                         join->file_name = base->file_name;
1695                         join->prog_name = base->prog_name;
1696                         join->stats_a = base;
1697                         join->stats_b = comp;
1698                         i++;
1699                         j++;
1700                 } else if (comp == &fallback_stats || r < 0) {
1701                         join->file_name = base->file_name;
1702                         join->prog_name = base->prog_name;
1703                         join->stats_a = base;
1704                         join->stats_b = NULL;
1705                         i++;
1706                 } else {
1707                         join->file_name = comp->file_name;
1708                         join->prog_name = comp->prog_name;
1709                         join->stats_a = NULL;
1710                         join->stats_b = comp;
1711                         j++;
1712                 }
1713                 env.join_stat_cnt += 1;
1714         }
1715
1716         /* now sort joined results accorsing to sort spec */
1717         qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
1718
1719         /* for human-readable table output we need to do extra pass to
1720          * calculate column widths, so we substitute current output format
1721          * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
1722          * and do everything again.
1723          */
1724         if (env.out_fmt == RESFMT_TABLE)
1725                 cur_fmt = RESFMT_TABLE_CALCLEN;
1726         else
1727                 cur_fmt = env.out_fmt;
1728
1729 one_more_time:
1730         output_comp_headers(cur_fmt);
1731
1732         for (i = 0; i < env.join_stat_cnt; i++) {
1733                 const struct verif_stats_join *join = &env.join_stats[i];
1734
1735                 if (!should_output_join_stats(join))
1736                         continue;
1737
1738                 if (cur_fmt == RESFMT_TABLE_CALCLEN)
1739                         last_idx = i;
1740
1741                 output_comp_stats(join, cur_fmt, i == last_idx);
1742         }
1743
1744         if (cur_fmt == RESFMT_TABLE_CALCLEN) {
1745                 cur_fmt = RESFMT_TABLE;
1746                 goto one_more_time; /* ... this time with feeling */
1747         }
1748
1749         return 0;
1750 }
1751
1752 static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
1753 {
1754         long value = stats->stats[f->stat_id];
1755
1756         switch (f->op) {
1757         case OP_EQ: return value == f->value;
1758         case OP_NEQ: return value != f->value;
1759         case OP_LT: return value < f->value;
1760         case OP_LE: return value <= f->value;
1761         case OP_GT: return value > f->value;
1762         case OP_GE: return value >= f->value;
1763         }
1764
1765         fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
1766         return false;
1767 }
1768
1769 static bool should_output_stats(const struct verif_stats *stats)
1770 {
1771         struct filter *f;
1772         int i, allow_cnt = 0;
1773
1774         for (i = 0; i < env.deny_filter_cnt; i++) {
1775                 f = &env.deny_filters[i];
1776                 if (f->kind != FILTER_STAT)
1777                         continue;
1778
1779                 if (is_stat_filter_matched(f, stats))
1780                         return false;
1781         }
1782
1783         for (i = 0; i < env.allow_filter_cnt; i++) {
1784                 f = &env.allow_filters[i];
1785                 if (f->kind != FILTER_STAT)
1786                         continue;
1787                 allow_cnt++;
1788
1789                 if (is_stat_filter_matched(f, stats))
1790                         return true;
1791         }
1792
1793         /* if there are no stat allowed filters, pass everything through */
1794         return allow_cnt == 0;
1795 }
1796
1797 static void output_prog_stats(void)
1798 {
1799         const struct verif_stats *stats;
1800         int i, last_stat_idx = 0;
1801
1802         if (env.out_fmt == RESFMT_TABLE) {
1803                 /* calculate column widths */
1804                 output_headers(RESFMT_TABLE_CALCLEN);
1805                 for (i = 0; i < env.prog_stat_cnt; i++) {
1806                         stats = &env.prog_stats[i];
1807                         if (!should_output_stats(stats))
1808                                 continue;
1809                         output_stats(stats, RESFMT_TABLE_CALCLEN, false);
1810                         last_stat_idx = i;
1811                 }
1812         }
1813
1814         /* actually output the table */
1815         output_headers(env.out_fmt);
1816         for (i = 0; i < env.prog_stat_cnt; i++) {
1817                 stats = &env.prog_stats[i];
1818                 if (!should_output_stats(stats))
1819                         continue;
1820                 output_stats(stats, env.out_fmt, i == last_stat_idx);
1821         }
1822 }
1823
1824 static int handle_verif_mode(void)
1825 {
1826         int i, err;
1827
1828         if (env.filename_cnt == 0) {
1829                 fprintf(stderr, "Please provide path to BPF object file!\n\n");
1830                 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1831                 return -EINVAL;
1832         }
1833
1834         for (i = 0; i < env.filename_cnt; i++) {
1835                 err = process_obj(env.filenames[i]);
1836                 if (err) {
1837                         fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
1838                         return err;
1839                 }
1840         }
1841
1842         qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1843
1844         output_prog_stats();
1845
1846         return 0;
1847 }
1848
1849 static int handle_replay_mode(void)
1850 {
1851         struct stat_specs specs = {};
1852         int err;
1853
1854         if (env.filename_cnt != 1) {
1855                 fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
1856                 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1857                 return -EINVAL;
1858         }
1859
1860         err = parse_stats_csv(env.filenames[0], &specs,
1861                               &env.prog_stats, &env.prog_stat_cnt);
1862         if (err) {
1863                 fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
1864                 return err;
1865         }
1866
1867         qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
1868
1869         output_prog_stats();
1870
1871         return 0;
1872 }
1873
1874 int main(int argc, char **argv)
1875 {
1876         int err = 0, i;
1877
1878         if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
1879                 return 1;
1880
1881         if (env.verbose && env.quiet) {
1882                 fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
1883                 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1884                 return 1;
1885         }
1886         if (env.verbose && env.log_level == 0)
1887                 env.log_level = 1;
1888
1889         if (env.output_spec.spec_cnt == 0) {
1890                 if (env.out_fmt == RESFMT_CSV)
1891                         env.output_spec = default_csv_output_spec;
1892                 else
1893                         env.output_spec = default_output_spec;
1894         }
1895         if (env.sort_spec.spec_cnt == 0)
1896                 env.sort_spec = default_sort_spec;
1897
1898         if (env.comparison_mode && env.replay_mode) {
1899                 fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
1900                 argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
1901                 return 1;
1902         }
1903
1904         if (env.comparison_mode)
1905                 err = handle_comparison_mode();
1906         else if (env.replay_mode)
1907                 err = handle_replay_mode();
1908         else
1909                 err = handle_verif_mode();
1910
1911         free_verif_stats(env.prog_stats, env.prog_stat_cnt);
1912         free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
1913         free(env.join_stats);
1914         for (i = 0; i < env.filename_cnt; i++)
1915                 free(env.filenames[i]);
1916         free(env.filenames);
1917         for (i = 0; i < env.allow_filter_cnt; i++) {
1918                 free(env.allow_filters[i].any_glob);
1919                 free(env.allow_filters[i].file_glob);
1920                 free(env.allow_filters[i].prog_glob);
1921         }
1922         free(env.allow_filters);
1923         for (i = 0; i < env.deny_filter_cnt; i++) {
1924                 free(env.deny_filters[i].any_glob);
1925                 free(env.deny_filters[i].file_glob);
1926                 free(env.deny_filters[i].prog_glob);
1927         }
1928         free(env.deny_filters);
1929         return -err;
1930 }