Merge remote-tracking branch 'torvalds/master' into perf-tools-next
authorArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 4 Aug 2023 13:06:38 +0000 (10:06 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Fri, 4 Aug 2023 13:06:38 +0000 (10:06 -0300)
To pick up the fixes that were just merged from perf-tools/perf-tools
for v6.5.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
99 files changed:
Documentation/admin-guide/perf/alibaba_pmu.rst
MAINTAINERS
tools/build/Makefile.build
tools/build/feature/Makefile
tools/perf/Documentation/perf-bench.txt
tools/perf/Documentation/perf.data-file-format.txt
tools/perf/Makefile.config
tools/perf/Makefile.perf
tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
tools/perf/arch/x86/util/evlist.c
tools/perf/arch/x86/util/evsel.c
tools/perf/bench/Build
tools/perf/bench/bench.h
tools/perf/bench/uprobe.c [new file with mode: 0644]
tools/perf/builtin-bench.c
tools/perf/builtin-diff.c
tools/perf/builtin-trace.c
tools/perf/check-headers.sh
tools/perf/pmu-events/Build
tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json [new file with mode: 0644]
tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json [new file with mode: 0644]
tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
tools/perf/pmu-events/arch/x86/mapfile.csv
tools/perf/pmu-events/arch/x86/meteorlake/cache.json
tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
tools/perf/pmu-events/arch/x86/meteorlake/memory.json
tools/perf/pmu-events/arch/x86/meteorlake/other.json
tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json
tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json
tools/perf/pmu-events/arch/x86/sapphirerapids/other.json
tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json
tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json
tools/perf/pmu-events/jevents.py
tools/perf/scripts/python/Perf-Trace-Util/Build
tools/perf/scripts/python/bin/gecko-record [new file with mode: 0644]
tools/perf/scripts/python/bin/gecko-report [new file with mode: 0644]
tools/perf/scripts/python/gecko.py [new file with mode: 0644]
tools/perf/tests/bpf.c
tools/perf/tests/builtin-test.c
tools/perf/tests/config-fragments/README [new file with mode: 0644]
tools/perf/tests/config-fragments/arm64 [new file with mode: 0644]
tools/perf/tests/config-fragments/config [new file with mode: 0644]
tools/perf/tests/shell/coresight/asm_pure_loop.sh
tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh
tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh
tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh
tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh
tools/perf/tests/shell/lib/probe.sh
tools/perf/tests/shell/lib/probe_vfs_getname.sh
tools/perf/tests/shell/lib/stat_output.sh
tools/perf/tests/shell/lib/waiting.sh
tools/perf/tests/shell/lock_contention.sh
tools/perf/tests/shell/probe_vfs_getname.sh
tools/perf/tests/shell/record+zstd_comp_decomp.sh
tools/perf/tests/shell/record_offcpu.sh
tools/perf/tests/shell/stat+csv_output.sh
tools/perf/tests/shell/stat+csv_summary.sh
tools/perf/tests/shell/stat+shadow_stat.sh
tools/perf/tests/shell/stat+std_output.sh
tools/perf/tests/shell/stat_bpf_counters.sh
tools/perf/tests/shell/stat_bpf_counters_cgrp.sh
tools/perf/tests/shell/test_arm_spe_fork.sh
tools/perf/tests/shell/test_perf_data_converter_json.sh
tools/perf/tests/shell/test_task_analyzer.sh
tools/perf/tests/shell/trace+probe_vfs_getname.sh
tools/perf/tests/stat.c
tools/perf/trace/beauty/arch_errno_names.sh
tools/perf/trace/beauty/x86_arch_prctl.sh
tools/perf/ui/browsers/hists.c
tools/perf/util/Build
tools/perf/util/annotate.c
tools/perf/util/bpf-filter.c
tools/perf/util/bpf-filter.y
tools/perf/util/bpf-loader.c
tools/perf/util/bpf_skel/bench_uprobe.bpf.c [new file with mode: 0644]
tools/perf/util/c++/Build
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evsel.c
tools/perf/util/expr.c
tools/perf/util/expr.y
tools/perf/util/machine.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.h
tools/perf/util/parse-events.l
tools/perf/util/parse-events.y
tools/perf/util/pmu.c
tools/perf/util/pmu.y
tools/perf/util/probe-event.c
tools/perf/util/scripting-engines/Build
tools/perf/util/setup.py
tools/perf/util/stat.c
tools/perf/util/thread.c
tools/perf/util/thread.h
tools/scripts/utilities.mak

index 11de998..7d84002 100644 (file)
@@ -88,6 +88,11 @@ data bandwidth::
     -e ali_drw_27080/hif_rmw/ \
     -e ali_drw_27080/cycle/ -- sleep 10
 
+Example usage of counting all memory read/write bandwidth by metric::
+
+  perf stat -M ddr_read_bandwidth.all -- sleep 10
+  perf stat -M ddr_write_bandwidth.all -- sleep 10
+
 The average DRAM bandwidth can be calculated as follows:
 
 - Read Bandwidth =  perf_hif_rd * DDRC_WIDTH * DDRC_Freq / DDRC_Cycle
index 92d23da..a06df3b 100644 (file)
@@ -16626,6 +16626,8 @@ L:      linux-kernel@vger.kernel.org
 S:     Supported
 W:     https://perf.wiki.kernel.org/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git perf/core
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools.git perf-tools
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools-next.git perf-tools-next
 F:     arch/*/events/*
 F:     arch/*/events/*/*
 F:     arch/*/include/asm/perf_event.h
index 8943033..fac4248 100644 (file)
@@ -117,6 +117,16 @@ $(OUTPUT)%.s: %.c FORCE
        $(call rule_mkdir)
        $(call if_changed_dep,cc_s_c)
 
+# bison and flex files are generated in the OUTPUT directory
+# so it needs a separate rule to depend on them properly
+$(OUTPUT)%-bison.o: $(OUTPUT)%-bison.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,$(host)cc_o_c)
+
+$(OUTPUT)%-flex.o: $(OUTPUT)%-flex.c FORCE
+       $(call rule_mkdir)
+       $(call if_changed_dep,$(host)cc_o_c)
+
 # Gather build data:
 #   obj-y        - list of build objects
 #   subdir-y     - list of directories to nest
index 2cd6dbb..3184f38 100644 (file)
@@ -340,7 +340,7 @@ $(OUTPUT)test-jvmti-cmlr.bin:
        $(BUILD)
 
 $(OUTPUT)test-llvm.bin:
-       $(BUILDXX) -std=gnu++14                                 \
+       $(BUILDXX) -std=gnu++17                                 \
                -I$(shell $(LLVM_CONFIG) --includedir)          \
                -L$(shell $(LLVM_CONFIG) --libdir)              \
                $(shell $(LLVM_CONFIG) --libs Core BPF)         \
@@ -348,17 +348,15 @@ $(OUTPUT)test-llvm.bin:
                > $(@:.bin=.make.output) 2>&1
 
 $(OUTPUT)test-llvm-version.bin:
-       $(BUILDXX) -std=gnu++14                                 \
+       $(BUILDXX) -std=gnu++17                                 \
                -I$(shell $(LLVM_CONFIG) --includedir)          \
                > $(@:.bin=.make.output) 2>&1
 
 $(OUTPUT)test-clang.bin:
-       $(BUILDXX) -std=gnu++14                                 \
+       $(BUILDXX) -std=gnu++17                                 \
                -I$(shell $(LLVM_CONFIG) --includedir)          \
                -L$(shell $(LLVM_CONFIG) --libdir)              \
-               -Wl,--start-group -lclangBasic -lclangDriver    \
-                 -lclangFrontend -lclangEdit -lclangLex        \
-                 -lclangAST -Wl,--end-group                    \
+               -Wl,--start-group -lclang-cpp -Wl,--end-group   \
                $(shell $(LLVM_CONFIG) --libs Core option)      \
                $(shell $(LLVM_CONFIG) --system-libs)           \
                > $(@:.bin=.make.output) 2>&1
index f04f0ea..ca57896 100644 (file)
@@ -67,6 +67,9 @@ SUBSYSTEM
 'internals'::
        Benchmark internal perf functionality.
 
+'uprobe'::
+       Benchmark overhead of uprobe + BPF.
+
 'all'::
        All benchmark subsystems.
 
index 635ba04..010a4ed 100644 (file)
@@ -43,7 +43,7 @@ struct perf_file_section {
 
 Flags section:
 
-For each of the optional features a perf_file_section it placed after the data
+For each of the optional features a perf_file_section is placed after the data
 section if the feature bit is set in the perf_header flags bitset. The
 respective perf_file_section points to the data of the additional header and
 defines its size.
index c5db0de..1bf8dc5 100644 (file)
@@ -246,6 +246,9 @@ ifeq ($(CC_NO_CLANG), 0)
 else
   CORE_CFLAGS += -O6
 endif
+else
+  CORE_CFLAGS += -g
+  CXXFLAGS += -g
 endif
 
 ifdef PARSER_DEBUG
@@ -256,6 +259,11 @@ ifdef PARSER_DEBUG
   $(call detected_var,PARSER_DEBUG_FLEX)
 endif
 
+ifdef LTO
+  CORE_CFLAGS += -flto
+  CXXFLAGS += -flto
+endif
+
 # Try different combinations to accommodate systems that only have
 # python[2][3]-config in weird combinations in the following order of
 # priority from lowest to highest:
@@ -319,18 +327,14 @@ FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
 FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
 
 CORE_CFLAGS += -fno-omit-frame-pointer
-CORE_CFLAGS += -ggdb3
-CORE_CFLAGS += -funwind-tables
 CORE_CFLAGS += -Wall
 CORE_CFLAGS += -Wextra
 CORE_CFLAGS += -std=gnu11
 
-CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti
+CXXFLAGS += -std=gnu++17 -fno-exceptions -fno-rtti
 CXXFLAGS += -Wall
+CXXFLAGS += -Wextra
 CXXFLAGS += -fno-omit-frame-pointer
-CXXFLAGS += -ggdb3
-CXXFLAGS += -funwind-tables
-CXXFLAGS += -Wno-strict-aliasing
 
 HOSTCFLAGS += -Wall
 HOSTCFLAGS += -Wextra
index 097316e..0ed7ee0 100644 (file)
@@ -426,10 +426,7 @@ EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
 LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
 
 ifeq ($(USE_CLANG), 1)
-  CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
-  CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
-  LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
-  LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
+  LIBS += -L$(shell $(LLVM_CONFIG) --libdir) -lclang-cpp
 endif
 
 ifeq ($(USE_LLVM), 1)
@@ -1057,6 +1054,7 @@ SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
 SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h $(SKEL_OUT)/sample_filter.skel.h
+SKELETONS += $(SKEL_OUT)/bench_uprobe.skel.h
 
 $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
        $(Q)$(MKDIR) -p $@
index fa526a9..59d7914 100755 (executable)
@@ -24,7 +24,7 @@ sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
 grep '^[0-9]' "$in" | sort -n > $sorted_table
 
 max_nr=0
-while read nr abi name entry compat; do
+while read nr _abi name entry _compat; do
     if [ $nr -ge 512 ] ; then # discard compat sycalls
         break
     fi
index cbd5821..b1ce0c5 100644 (file)
@@ -75,11 +75,12 @@ int arch_evlist__add_default_attrs(struct evlist *evlist,
 
 int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
 {
-       if (topdown_sys_has_perf_metrics() && evsel__sys_has_perf_metrics(lhs)) {
+       if (topdown_sys_has_perf_metrics() &&
+           (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) {
                /* Ensure the topdown slots comes first. */
-               if (strcasestr(lhs->name, "slots"))
+               if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots"))
                        return -1;
-               if (strcasestr(rhs->name, "slots"))
+               if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots"))
                        return 1;
                /* Followed by topdown events. */
                if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown"))
index 81d2265..090d0f3 100644 (file)
@@ -40,12 +40,11 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel)
 
 bool arch_evsel__must_be_in_group(const struct evsel *evsel)
 {
-       if (!evsel__sys_has_perf_metrics(evsel))
+       if (!evsel__sys_has_perf_metrics(evsel) || !evsel->name ||
+           strcasestr(evsel->name, "uops_retired.slots"))
                return false;
 
-       return evsel->name &&
-               (strcasestr(evsel->name, "slots") ||
-                strcasestr(evsel->name, "topdown"));
+       return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots");
 }
 
 int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
index 0f158dc..47412d4 100644 (file)
@@ -16,6 +16,7 @@ perf-y += inject-buildid.o
 perf-y += evlist-open-close.o
 perf-y += breakpoint.o
 perf-y += pmu-scan.o
+perf-y += uprobe.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
index 0d2b659..50de477 100644 (file)
@@ -42,6 +42,9 @@ int bench_inject_build_id(int argc, const char **argv);
 int bench_evlist_open_close(int argc, const char **argv);
 int bench_breakpoint_thread(int argc, const char **argv);
 int bench_breakpoint_enable(int argc, const char **argv);
+int bench_uprobe_baseline(int argc, const char **argv);
+int bench_uprobe_empty(int argc, const char **argv);
+int bench_uprobe_trace_printk(int argc, const char **argv);
 int bench_pmu_scan(int argc, const char **argv);
 
 #define BENCH_FORMAT_DEFAULT_STR       "default"
diff --git a/tools/perf/bench/uprobe.c b/tools/perf/bench/uprobe.c
new file mode 100644 (file)
index 0000000..914c081
--- /dev/null
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * uprobe.c
+ *
+ * uprobe benchmarks
+ *
+ *  Copyright (C) 2023, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+#include <linux/compiler.h>
+#include <linux/time64.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 1000
+static int loops = LOOPS_DEFAULT;
+
+enum bench_uprobe {
+        BENCH_UPROBE__BASELINE,
+        BENCH_UPROBE__EMPTY,
+        BENCH_UPROBE__TRACE_PRINTK,
+};
+
+static const struct option options[] = {
+       OPT_INTEGER('l', "loop",        &loops,         "Specify number of loops"),
+       OPT_END()
+};
+
+static const char * const bench_uprobe_usage[] = {
+       "perf bench uprobe <options>",
+       NULL
+};
+
+#ifdef HAVE_BPF_SKEL
+#include "bpf_skel/bench_uprobe.skel.h"
+
+#define bench_uprobe__attach_uprobe(prog) \
+       skel->links.prog = bpf_program__attach_uprobe_opts(/*prog=*/skel->progs.prog, \
+                                                          /*pid=*/-1, \
+                                                          /*binary_path=*/"/lib64/libc.so.6", \
+                                                          /*func_offset=*/0, \
+                                                          /*opts=*/&uprobe_opts); \
+       if (!skel->links.prog) { \
+               err = -errno; \
+               fprintf(stderr, "Failed to attach bench uprobe \"%s\": %s\n", #prog, strerror(errno)); \
+               goto cleanup; \
+       }
+
+struct bench_uprobe_bpf *skel;
+
+static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench)
+{
+       DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+       int err;
+
+       /* Load and verify BPF application */
+       skel = bench_uprobe_bpf__open();
+       if (!skel) {
+               fprintf(stderr, "Failed to open and load uprobes bench BPF skeleton\n");
+               return -1;
+       }
+
+       err = bench_uprobe_bpf__load(skel);
+       if (err) {
+               fprintf(stderr, "Failed to load and verify BPF skeleton\n");
+               goto cleanup;
+       }
+
+       uprobe_opts.func_name = "usleep";
+       switch (bench) {
+       case BENCH_UPROBE__BASELINE:                                                    break;
+       case BENCH_UPROBE__EMPTY:        bench_uprobe__attach_uprobe(empty);            break;
+       case BENCH_UPROBE__TRACE_PRINTK: bench_uprobe__attach_uprobe(trace_printk);     break;
+       default:
+               fprintf(stderr, "Invalid bench: %d\n", bench);
+               goto cleanup;
+       }
+
+       return err;
+cleanup:
+       bench_uprobe_bpf__destroy(skel);
+       return err;
+}
+
+static void bench_uprobe__teardown_bpf_skel(void)
+{
+       if (skel) {
+               bench_uprobe_bpf__destroy(skel);
+               skel = NULL;
+       }
+}
+#else
+static int bench_uprobe__setup_bpf_skel(enum bench_uprobe bench __maybe_unused) { return 0; }
+static void bench_uprobe__teardown_bpf_skel(void) {};
+#endif
+
+static int bench_uprobe_format__default_fprintf(const char *name, const char *unit, u64 diff, FILE *fp)
+{
+       static u64 baseline, previous;
+       s64 diff_to_baseline = diff - baseline,
+           diff_to_previous = diff - previous;
+       int printed = fprintf(fp, "# Executed %'d %s calls\n", loops, name);
+
+       printed += fprintf(fp, " %14s: %'" PRIu64 " %ss", "Total time", diff, unit);
+
+       if (baseline) {
+               printed += fprintf(fp, " %s%'" PRId64 " to baseline", diff_to_baseline > 0 ? "+" : "", diff_to_baseline);
+
+               if (previous != baseline)
+                       fprintf(stdout, " %s%'" PRId64 " to previous", diff_to_previous > 0 ? "+" : "", diff_to_previous);
+       }
+
+       printed += fprintf(fp, "\n\n %'.3f %ss/op", (double)diff / (double)loops, unit);
+
+       if (baseline) {
+               printed += fprintf(fp, " %'.3f %ss/op to baseline", (double)diff_to_baseline / (double)loops, unit);
+
+               if (previous != baseline)
+                       printed += fprintf(fp, " %'.3f %ss/op to previous", (double)diff_to_previous / (double)loops, unit);
+       } else {
+               baseline = diff;
+       }
+
+       fputc('\n', fp);
+
+       previous = diff;
+
+       return printed + 1;
+}
+
+static int bench_uprobe(int argc, const char **argv, enum bench_uprobe bench)
+{
+       const char *name = "usleep(1000)", *unit = "usec";
+       struct timespec start, end;
+       u64 diff;
+       int i;
+
+       argc = parse_options(argc, argv, options, bench_uprobe_usage, 0);
+
+       if (bench != BENCH_UPROBE__BASELINE && bench_uprobe__setup_bpf_skel(bench) < 0)
+               return 0;
+
+        clock_gettime(CLOCK_REALTIME, &start);
+
+       for (i = 0; i < loops; i++) {
+               usleep(USEC_PER_MSEC);
+       }
+
+       clock_gettime(CLOCK_REALTIME, &end);
+
+       diff = end.tv_sec * NSEC_PER_SEC + end.tv_nsec - (start.tv_sec * NSEC_PER_SEC + start.tv_nsec);
+       diff /= NSEC_PER_USEC;
+
+       switch (bench_format) {
+       case BENCH_FORMAT_DEFAULT:
+               bench_uprobe_format__default_fprintf(name, unit, diff, stdout);
+               break;
+
+       case BENCH_FORMAT_SIMPLE:
+               printf("%" PRIu64 "\n", diff);
+               break;
+
+       default:
+               /* reaching here is something of a disaster */
+               fprintf(stderr, "Unknown format:%d\n", bench_format);
+               exit(1);
+       }
+
+       if (bench != BENCH_UPROBE__BASELINE)
+               bench_uprobe__teardown_bpf_skel();
+
+       return 0;
+}
+
+int bench_uprobe_baseline(int argc, const char **argv)
+{
+       return bench_uprobe(argc, argv, BENCH_UPROBE__BASELINE);
+}
+
+int bench_uprobe_empty(int argc, const char **argv)
+{
+       return bench_uprobe(argc, argv, BENCH_UPROBE__EMPTY);
+}
+
+int bench_uprobe_trace_printk(int argc, const char **argv)
+{
+       return bench_uprobe(argc, argv, BENCH_UPROBE__TRACE_PRINTK);
+}
index db435b7..f60ccaf 100644 (file)
@@ -104,6 +104,13 @@ static struct bench breakpoint_benchmarks[] = {
        { NULL, NULL, NULL },
 };
 
+static struct bench uprobe_benchmarks[] = {
+       { "baseline",   "Baseline libc usleep(1000) call",                              bench_uprobe_baseline,  },
+       { "empty",      "Attach empty BPF prog to uprobe on usleep, system wide",       bench_uprobe_empty,     },
+       { "trace_printk", "Attach trace_printk BPF prog to uprobe on usleep syswide",   bench_uprobe_trace_printk,      },
+       { NULL, NULL, NULL },
+};
+
 struct collection {
        const char      *name;
        const char      *summary;
@@ -123,6 +130,7 @@ static struct collection collections[] = {
 #endif
        { "internals",  "Perf-internals benchmarks",                    internals_benchmarks    },
        { "breakpoint", "Breakpoint benchmarks",                        breakpoint_benchmarks   },
+       { "uprobe",     "uprobe benchmarks",                            uprobe_benchmarks       },
        { "all",        "All benchmarks",                               NULL                    },
        { NULL,         NULL,                                           NULL                    }
 };
index e8a1b16..57d300d 100644 (file)
@@ -1915,8 +1915,8 @@ static int data_init(int argc, const char **argv)
                struct perf_data *data = &d->data;
 
                data->path  = use_default ? defaults[i] : argv[i];
-               data->mode  = PERF_DATA_MODE_READ,
-               data->force = force,
+               data->mode  = PERF_DATA_MODE_READ;
+               data->force = force;
 
                d->idx  = i;
        }
index 6e73d0e..7ece252 100644 (file)
@@ -1296,6 +1296,22 @@ static struct thread_trace *thread_trace__new(void)
        return ttrace;
 }
 
+static void thread_trace__free_files(struct thread_trace *ttrace);
+
+static void thread_trace__delete(void *pttrace)
+{
+       struct thread_trace *ttrace = pttrace;
+
+       if (!ttrace)
+               return;
+
+       intlist__delete(ttrace->syscall_stats);
+       ttrace->syscall_stats = NULL;
+       thread_trace__free_files(ttrace);
+       zfree(&ttrace->entry_str);
+       free(ttrace);
+}
+
 static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
 {
        struct thread_trace *ttrace;
@@ -1333,6 +1349,17 @@ void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
 
 static const size_t trace__entry_str_size = 2048;
 
+static void thread_trace__free_files(struct thread_trace *ttrace)
+{
+       for (int i = 0; i < ttrace->files.max; ++i) {
+               struct file *file = ttrace->files.table + i;
+               zfree(&file->pathname);
+       }
+
+       zfree(&ttrace->files.table);
+       ttrace->files.max  = -1;
+}
+
 static struct file *thread_trace__files_entry(struct thread_trace *ttrace, int fd)
 {
        if (fd < 0)
@@ -1635,6 +1662,8 @@ static int trace__symbols_init(struct trace *trace, struct evlist *evlist)
        if (trace->host == NULL)
                return -ENOMEM;
 
+       thread__set_priv_destructor(thread_trace__delete);
+
        err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
        if (err < 0)
                goto out;
@@ -3136,13 +3165,8 @@ static void evlist__free_syscall_tp_fields(struct evlist *evlist)
        struct evsel *evsel;
 
        evlist__for_each_entry(evlist, evsel) {
-               struct evsel_trace *et = evsel->priv;
-
-               if (!et || !evsel->tp_format || strcmp(evsel->tp_format->system, "syscalls"))
-                       continue;
-
-               zfree(&et->fmt);
-               free(et);
+               evsel_trace__delete(evsel->priv);
+               evsel->priv = NULL;
        }
 }
 
index a0f1d8a..4314c91 100755 (executable)
@@ -123,7 +123,7 @@ check () {
 
   shift
 
-  check_2 "tools/$file" "$file" $*
+  check_2 "tools/$file" "$file" "$@"
 }
 
 beauty_check () {
@@ -131,7 +131,7 @@ beauty_check () {
 
   shift
 
-  check_2 "tools/perf/trace/beauty/$file" "$file" $*
+  check_2 "tools/perf/trace/beauty/$file" "$file" "$@"
 }
 
 # Check if we have the kernel headers (tools/perf/../../include), else
@@ -183,7 +183,7 @@ done
 check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
 check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
 
-cd tools/perf
+cd tools/perf || exit
 
 if [ ${#FAILURES[@]} -gt 0 ]
 then
index 150765f..1d18bb8 100644 (file)
@@ -35,3 +35,9 @@ $(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_L
        $(call rule_mkdir)
        $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
 endif
+
+# pmu-events.c file is generated in the OUTPUT directory so it needs a
+# separate rule to depend on it properly
+$(OUTPUT)pmu-events/pmu-events.o: $(PMU_EVENTS_C)
+       $(call rule_mkdir)
+       $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/ali_drw.json
new file mode 100644 (file)
index 0000000..e21c469
--- /dev/null
@@ -0,0 +1,373 @@
+[
+       {
+               "BriefDescription": "A Write or Read Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x0",
+               "EventName": "hif_rd_or_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x1",
+               "EventName": "hif_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x2",
+               "EventName": "hif_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read-Modify-Write Op at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x3",
+               "EventName": "hif_rmw",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A high priority Read at HIF interface. The unit is 64B.",
+               "ConfigCode": "0x4",
+               "EventName": "hif_hi_pri_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A write data cycle at DFI interface (to DRAM).",
+               "ConfigCode": "0x7",
+               "EventName": "dfi_wr_data_cycles",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A read data cycle at DFI interface (to DRAM).",
+               "ConfigCode": "0x8",
+               "EventName": "dfi_rd_data_cycles",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A high priority read becomes critical.",
+               "ConfigCode": "0x9",
+               "EventName": "hpr_xact_when_critical",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A low priority read becomes critical.",
+               "ConfigCode": "0xA",
+               "EventName": "lpr_xact_when_critical",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A write becomes critical.",
+               "ConfigCode": "0xB",
+               "EventName": "wr_xact_when_critical",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An Activate(ACT) command to DRAM.",
+               "ConfigCode": "0xC",
+               "EventName": "op_is_activate",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read or Write CAS command to DRAM.",
+               "ConfigCode": "0xD",
+               "EventName": "op_is_rd_or_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An Activate(ACT) command for read to DRAM.",
+               "ConfigCode": "0xE",
+               "EventName": "op_is_rd_activate",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read CAS command to DRAM.",
+               "ConfigCode": "0xF",
+               "EventName": "op_is_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write CAS command to DRAM.",
+               "ConfigCode": "0x10",
+               "EventName": "op_is_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Masked Write command to DRAM.",
+               "ConfigCode": "0x11",
+               "EventName": "op_is_mwr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Precharge(PRE) command to DRAM.",
+               "ConfigCode": "0x12",
+               "EventName": "op_is_precharge",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Precharge(PRE) required by read or write.",
+               "ConfigCode": "0x13",
+               "EventName": "precharge_for_rdwr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Precharge(PRE) required by other conditions.",
+               "ConfigCode": "0x14",
+               "EventName": "precharge_for_other",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A read-write turnaround.",
+               "ConfigCode": "0x15",
+               "EventName": "rdwr_transitions",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A write combine(merge) in write data buffer.",
+               "ConfigCode": "0x16",
+               "EventName": "write_combine",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write-After-Read hazard.",
+               "ConfigCode": "0x17",
+               "EventName": "war_hazard",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Read-After-Write hazard.",
+               "ConfigCode": "0x18",
+               "EventName": "raw_hazard",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Write-After-Write hazard.",
+               "ConfigCode": "0x19",
+               "EventName": "waw_hazard",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank0 enters self-refresh(SRE).",
+               "ConfigCode": "0x1A",
+               "EventName": "op_is_enter_selfref_rk0",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank1 enters self-refresh(SRE).",
+               "ConfigCode": "0x1B",
+               "EventName": "op_is_enter_selfref_rk1",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank2 enters self-refresh(SRE).",
+               "ConfigCode": "0x1C",
+               "EventName": "op_is_enter_selfref_rk2",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank3 enters self-refresh(SRE).",
+               "ConfigCode": "0x1D",
+               "EventName": "op_is_enter_selfref_rk3",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank0 enters power-down(PDE).",
+               "ConfigCode": "0x1E",
+               "EventName": "op_is_enter_powerdown_rk0",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank1 enters power-down(PDE).",
+               "ConfigCode": "0x1F",
+               "EventName": "op_is_enter_powerdown_rk1",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank2 enters power-down(PDE).",
+               "ConfigCode": "0x20",
+               "EventName": "op_is_enter_powerdown_rk2",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "Rank3 enters power-down(PDE).",
+               "ConfigCode": "0x21",
+               "EventName": "op_is_enter_powerdown_rk3",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank0 stays in self-refresh mode.",
+               "ConfigCode": "0x26",
+               "EventName": "selfref_mode_rk0",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank1 stays in self-refresh mode.",
+               "ConfigCode": "0x27",
+               "EventName": "selfref_mode_rk1",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank2 stays in self-refresh mode.",
+               "ConfigCode": "0x28",
+               "EventName": "selfref_mode_rk2",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A cycle that Rank3 stays in self-refresh mode.",
+               "ConfigCode": "0x29",
+               "EventName": "selfref_mode_rk3",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An auto-refresh(REF) command to DRAM.",
+               "ConfigCode": "0x2A",
+               "EventName": "op_is_refresh",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A critical auto-refresh(REF) command to DRAM.",
+               "ConfigCode": "0x2B",
+               "EventName": "op_is_crit_ref",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "An MRR or MRW command to DRAM.",
+               "ConfigCode": "0x2D",
+               "EventName": "op_is_load_mode",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A ZQCal command to DRAM.",
+               "ConfigCode": "0x2E",
+               "EventName": "op_is_zqcl",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "At least one entry in read queue reaches the visible window limit.",
+               "ConfigCode": "0x30",
+               "EventName": "visible_window_limit_reached_rd",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "At least one entry in write queue reaches the visible window limit.",
+               "ConfigCode": "0x31",
+               "EventName": "visible_window_limit_reached_wr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A DQS Oscillator MPC command to DRAM.",
+               "ConfigCode": "0x34",
+               "EventName": "op_is_dqsosc_mpc",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A DQS Oscillator MRR command to DRAM.",
+               "ConfigCode": "0x35",
+               "EventName": "op_is_dqsosc_mrr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A Temperature Compensated Refresh(TCR) MRR command to DRAM.",
+               "ConfigCode": "0x36",
+               "EventName": "op_is_tcr_mrr",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A ZQCal Start command to DRAM.",
+               "ConfigCode": "0x37",
+               "EventName": "op_is_zqstart",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A ZQCal Latch command to DRAM.",
+               "ConfigCode": "0x38",
+               "EventName": "op_is_zqlatch",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI TXREQ interface (request).",
+               "ConfigCode": "0x39",
+               "EventName": "chi_txreq",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI TXDAT interface (read data).",
+               "ConfigCode": "0x3A",
+               "EventName": "chi_txdat",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI RXDAT interface (write data).",
+               "ConfigCode": "0x3B",
+               "EventName": "chi_rxdat",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A packet at CHI RXRSP interface.",
+               "ConfigCode": "0x3C",
+               "EventName": "chi_rxrsp",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "A violation detected in TZC.",
+               "ConfigCode": "0x3D",
+               "EventName": "tsz_vio",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "BriefDescription": "The ddr cycles.",
+               "ConfigCode": "0x80",
+               "EventName": "ddr_cycles",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/yitian710/sys/metrics.json
new file mode 100644 (file)
index 0000000..bc865b3
--- /dev/null
@@ -0,0 +1,20 @@
+[
+       {
+               "MetricName": "ddr_read_bandwidth.all",
+               "BriefDescription": "The ddr read bandwidth(MB/s).",
+               "MetricGroup": "ali_drw",
+               "MetricExpr": "hif_rd * 64 / 1e6 / duration_time",
+               "ScaleUnit": "1MB/s",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       },
+       {
+               "MetricName": "ddr_write_bandwidth.all",
+               "BriefDescription": "The ddr write bandwidth(MB/s).",
+               "MetricGroup": "ali_drw",
+               "MetricExpr": "(hif_wr + hif_rmw) * 64 / 1e6 / duration_time",
+               "ScaleUnit": "1MB/s",
+               "Unit": "ali_drw",
+               "Compat": "ali_drw_pmu"
+       }
+]
index daf9458..c6780d5 100644 (file)
     },
     {
         "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / tma_info_core_clks - max((cpu_atom@MEM_BOUND_STALLS.LOAD@ - cpu_atom@LD_HEAD.L1_MISS_AT_RET@) / tma_info_core_clks, 0) * cpu_atom@MEM_BOUND_STALLS.LOAD_LLC_HIT@ / cpu_atom@MEM_BOUND_STALLS.LOAD@",
         "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@) + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc",
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu_core@FP_ARITH_DISPATCHED.PORT_0@ + cpu_core@FP_ARITH_DISPATCHED.PORT_1@ + cpu_core@FP_ARITH_DISPATCHED.PORT_5@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu_core@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire",
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(cpu_core@MEMORY_ACTIVITY.STALLS_L2_MISS@ - cpu_core@MEMORY_ACTIVITY.STALLS_L3_MISS@) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 0f1628d..06e67e3 100644 (file)
     },
     {
         "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / tma_info_core_clks - max((MEM_BOUND_STALLS.LOAD - LD_HEAD.L1_MISS_AT_RET) / tma_info_core_clks, 0) * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD",
         "MetricGroup": "TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / tma_info_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 8fcc05c..a6eed0d 100644 (file)
@@ -85,6 +85,7 @@
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 9bb7e3f..7082ad5 100644 (file)
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 6650100..3a8770e 100644 (file)
@@ -19,12 +19,12 @@ GenuineIntel-6-3A,v24,ivybridge,core
 GenuineIntel-6-3E,v23,ivytown,core
 GenuineIntel-6-2D,v23,jaketown,core
 GenuineIntel-6-(57|85),v10,knightslanding,core
-GenuineIntel-6-A[AC],v1.03,meteorlake,core
+GenuineIntel-6-A[AC],v1.04,meteorlake,core
 GenuineIntel-6-1[AEF],v3,nehalemep,core
 GenuineIntel-6-2E,v3,nehalemex,core
 GenuineIntel-6-A7,v1.01,rocketlake,core
 GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-(8F|CF),v1.14,sapphirerapids,core
+GenuineIntel-6-(8F|CF),v1.15,sapphirerapids,core
 GenuineIntel-6-AF,v1.00,sierraforest,core
 GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
 GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v57,skylake,core
index e1ae7c9..1de0200 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
+        "EventCode": "0x48",
+        "EventName": "L1D_PEND_MISS.L2_STALLS",
+        "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Number of L1D misses that are outstanding",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.PENDING",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles when L1D is locked",
+        "EventCode": "0x42",
+        "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+        "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
         "EventCode": "0x2e",
         "EventName": "LONGEST_LAT_CACHE.MISS",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
         "Data_LA": "1",
         "EventCode": "0xd2",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
+        "EventCode": "0x44",
+        "EventName": "MEM_STORE_RETIRED.L2_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of load ops retired.",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cacheable and Non-Cacheable code read requests",
+        "EventCode": "0x21",
+        "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+        "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Demand Data Read requests sent to uncore",
         "EventCode": "0x21",
         "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+        "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles where at least 1 outstanding demand data read request is pending.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+        "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+        "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+        "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending.   Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+        "CounterMask": "6",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Store Read transactions pending for off-core. Highly correlated.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+        "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts bus locks, accounts for cache line split locks and UC locks.",
         "EventCode": "0x2c",
         "EventName": "SQ_MISC.BUS_LOCK",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.NTA",
+        "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHW instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+        "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.T0",
+        "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "EventCode": "0x40",
+        "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+        "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to an icache miss",
         "EventCode": "0x71",
         "EventName": "TOPDOWN_FE_BOUND.ICACHE",
index 616489f..f66506e 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
+        "EventCode": "0xb3",
+        "EventName": "FP_ARITH_DISPATCHED.PORT_5",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
index 0f06451..8264419 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "DSB_FILL.FB_STALL_OT",
+        "EventCode": "0x62",
+        "EventName": "DSB_FILL.FB_STALL_OT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired ANT branches",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.ANY_ANT",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired Instructions who experienced DSB miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x1",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.DSB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x11",
+        "PEBS": "1",
+        "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of instructions retired that were tagged because empty issue slots were seen before the uop due to ITLB miss",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.ITLB_MISS",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.L2_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x13",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
+        "EventCode": "0xc6",
+        "EventName": "FRONTEND_RETIRED.STLB_MISS",
+        "MSRIndex": "0x3F7",
+        "MSRValue": "0x15",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x3",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
index 67e949b..2605e1d 100644 (file)
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
         "CounterMask": "3",
         "EventCode": "0x47",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "MEMORY_ORDERING.MD_NUKE",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_ORDERING.MD_NUKE",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of memory ordering machine clears due to memory renaming.",
+        "EventCode": "0x09",
+        "EventName": "MEMORY_ORDERING.MRN_NUKE",
+        "SampleAfterValue": "100003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x400",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "53",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.",
+        "Data_LA": "1",
+        "EventCode": "0xcd",
+        "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x800",
+        "PEBS": "2",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles.  Reported latency may be longer than just the memory latency.",
+        "SampleAfterValue": "23",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "SampleAfterValue": "100003",
         "UMask": "0x10",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles where data return is pending for a Demand Data Read request who miss L3 cache.",
+        "CounterMask": "1",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.  Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache.",
+        "CounterMask": "6",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "PublicDescription": "Cycles where the core is waiting on at least 6 outstanding demand data read requests known to have missed the L3 cache.  Note that this event does not capture all elapsed cycles while the requests are outstanding - only cycles from when the requests were known to have missed the L3 cache.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
     }
 ]
index 2ec57f4..f4c6035 100644 (file)
@@ -1,5 +1,13 @@
 [
     {
+        "BriefDescription": "ASSISTS.PAGE_FAULT",
+        "EventCode": "0xc1",
+        "EventName": "ASSISTS.PAGE_FAULT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts streaming stores that have any type of response.",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "RS.EMPTY_RESOURCE",
+        "EventCode": "0xa5",
+        "EventName": "RS.EMPTY_RESOURCE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of issue slots in a UMWAIT or TPAUSE instruction where no uop issues due to the instruction putting the CPU into the C0.1 activity state. For Tremont, UMWAIT and TPAUSE will only put the CPU into C0.1 activity state (not C0.2 activity state)",
         "EventCode": "0x75",
         "EventName": "SERIALIZATION.C01_MS_SCB",
index eeaa7a9..352c5ef 100644 (file)
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RET",
+        "PEBS": "1",
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.RETURN",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.C01",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.  This state can be entered via the TPAUSE or UMWAIT instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.C02",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.  This state can be entered via the TPAUSE or UMWAIT instructions.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x20",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.C0_WAIT",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x70",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles",
         "EventName": "CPU_CLK_UNHALTED.CORE",
         "SampleAfterValue": "2000003",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "CPU_CLK_UNHALTED.PAUSE",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.PAUSE",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xec",
+        "EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x40",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
         "EventCode": "0x3c",
         "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Retired NOP instructions.",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.NOP",
+        "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Precise instruction retired with PEBS precise-distribution",
         "EventName": "INST_RETIRED.PREC_DIST",
         "PEBS": "1",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Iterations of Repeat string retired instructions.",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.REP_ITERATION",
+        "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Cycles the Backend cluster is recovering after a miss-speculation or a Store Buffer or Load Buffer drain stall.",
         "CounterMask": "1",
         "EventCode": "0xad",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Clears speculative count",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xad",
+        "EventName": "INT_MISC.CLEARS_COUNT",
+        "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
+        "SampleAfterValue": "500009",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
         "EventCode": "0xad",
         "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+        "EventCode": "0xad",
+        "EventName": "INT_MISC.RAT_STALLS",
+        "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
         "EventCode": "0xad",
         "EventName": "INT_MISC.RECOVERY_CYCLES",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+        "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.DATA_UNKNOWN",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.NO_SR",
+        "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x88",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts the number of retired loads that are blocked because its address partially overlapped with an older store.",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.STORE_FORWARD",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.STORE_FORWARD",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x82",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
         "CounterMask": "1",
         "EventCode": "0xa8",
         "Unit": "cpu_atom"
     },
     {
+        "BriefDescription": "Self-modifying code (SMC) detected.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "LFENCE instructions retired",
+        "EventCode": "0xe0",
+        "EventName": "MISC2_RETIRED.LFENCE",
+        "PublicDescription": "number of LFENCE retired instructions",
+        "SampleAfterValue": "400009",
+        "UMask": "0x20",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
         "EventCode": "0xa2",
         "EventName": "RESOURCE_STALLS.SCOREBOARD",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles with retired uop(s).",
+        "CounterMask": "1",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.CYCLES",
+        "PublicDescription": "Counts cycles where at least one uop has retired.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Retired uops except the last uop of each instruction.",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.HEAVY",
         "Unit": "cpu_core"
     },
     {
+        "BriefDescription": "Cycles without actually retired uops.",
+        "CounterMask": "1",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.STALLS",
+        "Invert": "1",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
         "BriefDescription": "Cycles with less than 10 actually retired uops.",
         "CounterMask": "10",
         "EventCode": "0xc2",
index 1bb9ced..a0191c8 100644 (file)
@@ -85,6 +85,7 @@
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 31b6be9..442ef38 100644 (file)
         "UMask": "0x1"
     },
     {
+        "BriefDescription": "Counts demand data reads that were supplied by PMM attached to this socket, whether or not in Sub NUMA Cluster(SNC) Mode.  In SNC Mode counts PMM accesses that are controlled by the close or distant SNC Cluster.",
+        "EventCode": "0x2A,0x2B",
+        "EventName": "OCR.DEMAND_DATA_RD.LOCAL_SOCKET_PMM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x700C00001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by PMM.",
+        "EventCode": "0x2A,0x2B",
+        "EventName": "OCR.DEMAND_DATA_RD.PMM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x703C00001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
         "BriefDescription": "Counts demand data reads that were supplied by DRAM attached to another socket.",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.REMOTE_DRAM",
index c207c85..222212a 100644 (file)
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector + tma_fp_amx",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_pmm_bound + tma_store_bound) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + FP_ARITH_INST_RETIRED2.SCALAR_HALF + 2 * (FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF) + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * (FP_ARITH_INST_RETIRED2.128B_PACKED_HALF + cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@) + 16 * (FP_ARITH_INST_RETIRED2.256B_PACKED_HALF + FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) + 32 * FP_ARITH_INST_RETIRED2.512B_PACKED_HALF + 4 * AMX_OPS_RETIRED.BF16",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(FP_ARITH_DISPATCHED.PORT_0 + FP_ARITH_DISPATCHED.PORT_1 + FP_ARITH_DISPATCHED.PORT_5) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L2_MISS - MEMORY_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index c7c2d6a..fab084e 100644 (file)
@@ -79,6 +79,7 @@
     },
     {
         "BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_4k_aliasing",
     },
     {
         "BriefDescription": "This metric represents overall arithmetic floating-point (FP) operations fraction the CPU has executed (retired)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_x87_use + tma_fp_scalar + tma_fp_vector",
         "MetricGroup": "HPC;TopdownL3;tma_L3_group;tma_light_operations_group",
         "MetricName": "tma_fp_arith",
     },
     {
         "BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
         "MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
         "MetricName": "tma_info_botlnk_l2_ic_misses",
     },
     {
         "BriefDescription": "Total pipeline cost of (external) Memory Bandwidth related bottlenecks",
+        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "100 * tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full))) + tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_lock_latency + tma_split_loads + tma_store_fwd_blk))",
         "MetricGroup": "Mem;MemoryBW;Offcore;tma_issueBW",
         "MetricName": "tma_info_bottleneck_memory_bandwidth",
     },
     {
         "BriefDescription": "Floating Point Operations Per Cycle",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + 2 * FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4 * cpu@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE\\,umask\\=0x18@ + 8 * cpu@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE\\,umask\\=0x60@ + 16 * FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE) / tma_info_core_core_clks",
         "MetricGroup": "Flops;Ret",
         "MetricName": "tma_info_core_flopc"
     },
     {
         "BriefDescription": "Actual per-core usage of the Floating Point non-X87 execution units (regardless of precision or vector-width)",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "(cpu@FP_ARITH_INST_RETIRED.SCALAR_SINGLE\\,umask\\=0x03@ + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@) / (2 * tma_info_core_core_clks)",
         "MetricGroup": "Cor;Flops;HPC",
         "MetricName": "tma_info_core_fp_arith_utilization",
     },
     {
         "BriefDescription": "Average number of Uops retired in cycles where at least one uop has retired.",
-        "MetricConstraint": "NO_GROUP_EVENTS",
         "MetricExpr": "tma_retiring * tma_info_thread_slots / cpu@UOPS_RETIRED.SLOTS\\,cmask\\=1@",
         "MetricGroup": "Pipeline;Ret",
         "MetricName": "tma_info_pipeline_retire"
     },
     {
         "BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
         "MetricGroup": "CacheMisses;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
         "MetricName": "tma_l3_bound",
     },
     {
         "BriefDescription": "This metric represents rate of split store accesses",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
         "MetricName": "tma_split_stores",
     },
     {
         "BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
+        "MetricConstraint": "NO_GROUP_EVENTS_NMI",
         "MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
         "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
         "MetricName": "tma_store_fwd_blk",
index 12e80bb..8cd561a 100755 (executable)
@@ -274,6 +274,7 @@ class JsonEvent:
           'DFPMC': 'amd_df',
           'cpu_core': 'cpu_core',
           'cpu_atom': 'cpu_atom',
+          'ali_drw': 'ali_drw',
       }
       return table[unit] if unit in table else f'uncore_{unit.lower()}'
 
@@ -999,7 +1000,7 @@ such as "arm/cortex-a34".''',
   _args = ap.parse_args()
 
   _args.output_file.write("""
-#include "pmu-events/pmu-events.h"
+#include <pmu-events/pmu-events.h>
 #include "util/header.h"
 #include "util/pmu.h"
 #include <string.h>
index 7d0e33c..5b0b5ff 100644 (file)
@@ -1,3 +1,4 @@
 perf-y += Context.o
 
-CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
+# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
+CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-declaration-after-statement
diff --git a/tools/perf/scripts/python/bin/gecko-record b/tools/perf/scripts/python/bin/gecko-record
new file mode 100644 (file)
index 0000000..f0d1aa5
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -F 99 -g "$@"
diff --git a/tools/perf/scripts/python/bin/gecko-report b/tools/perf/scripts/python/bin/gecko-report
new file mode 100644 (file)
index 0000000..0c12cc0
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: create firefox gecko profile json format from perf.data
+perf script "$@" -s "$PERF_EXEC_PATH"/scripts/python/gecko.py
diff --git a/tools/perf/scripts/python/gecko.py b/tools/perf/scripts/python/gecko.py
new file mode 100644 (file)
index 0000000..278c3ae
--- /dev/null
@@ -0,0 +1,339 @@
+# firefox-gecko-converter.py - Convert perf record output to Firefox's gecko profile format
+# SPDX-License-Identifier: GPL-2.0
+#
+# The script converts perf.data to Gecko Profile Format,
+# which can be read by https://profiler.firefox.com/.
+#
+# Usage:
+#
+#     perf record -a -g -F 99 sleep 60
+#     perf script report gecko > output.json
+
+import os
+import sys
+import json
+import argparse
+from functools import reduce
+from dataclasses import dataclass, field
+from typing import List, Dict, Optional, NamedTuple, Set, Tuple, Any
+
+# Add the Perf-Trace-Util library to the Python path
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+       '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+StringID = int
+StackID = int
+FrameID = int
+CategoryID = int
+Milliseconds = float
+
+# start_time is intialiazed only once for the all event traces.
+start_time = None
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/profile.js#L425
+# Follow Brendan Gregg's Flamegraph convention: orange for kernel and yellow for user space by default.
+CATEGORIES = None
+
+# The product name is used by the profiler UI to show the Operating system and Processor.
+PRODUCT = os.popen('uname -op').read().strip()
+
+# Here key = tid, value = Thread
+tid_to_thread = dict()
+
+# The category index is used by the profiler UI to show the color of the flame graph.
+USER_CATEGORY_INDEX = 0
+KERNEL_CATEGORY_INDEX = 1
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
+class Frame(NamedTuple):
+       string_id: StringID
+       relevantForJS: bool
+       innerWindowID: int
+       implementation: None
+       optimizations: None
+       line: None
+       column: None
+       category: CategoryID
+       subcategory: int
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
+class Stack(NamedTuple):
+       prefix_id: Optional[StackID]
+       frame_id: FrameID
+
+# https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
+class Sample(NamedTuple):
+       stack_id: Optional[StackID]
+       time_ms: Milliseconds
+       responsiveness: int
+
+@dataclass
+class Thread:
+       """A builder for a profile of the thread.
+
+       Attributes:
+               comm: Thread command-line (name).
+               pid: process ID of containing process.
+               tid: thread ID.
+               samples: Timeline of profile samples.
+               frameTable: interned stack frame ID -> stack frame.
+               stringTable: interned string ID -> string.
+               stringMap: interned string -> string ID.
+               stackTable: interned stack ID -> stack.
+               stackMap: (stack prefix ID, leaf stack frame ID) -> interned Stack ID.
+               frameMap: Stack Frame string -> interned Frame ID.
+               comm: str
+               pid: int
+               tid: int
+               samples: List[Sample] = field(default_factory=list)
+               frameTable: List[Frame] = field(default_factory=list)
+               stringTable: List[str] = field(default_factory=list)
+               stringMap: Dict[str, int] = field(default_factory=dict)
+               stackTable: List[Stack] = field(default_factory=list)
+               stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
+               frameMap: Dict[str, int] = field(default_factory=dict)
+       """
+       comm: str
+       pid: int
+       tid: int
+       samples: List[Sample] = field(default_factory=list)
+       frameTable: List[Frame] = field(default_factory=list)
+       stringTable: List[str] = field(default_factory=list)
+       stringMap: Dict[str, int] = field(default_factory=dict)
+       stackTable: List[Stack] = field(default_factory=list)
+       stackMap: Dict[Tuple[Optional[int], int], int] = field(default_factory=dict)
+       frameMap: Dict[str, int] = field(default_factory=dict)
+
+       def _intern_stack(self, frame_id: int, prefix_id: Optional[int]) -> int:
+               """Gets a matching stack, or saves the new stack. Returns a Stack ID."""
+               key = f"{frame_id}" if prefix_id is None else f"{frame_id},{prefix_id}"
+               # key = (prefix_id, frame_id)
+               stack_id = self.stackMap.get(key)
+               if stack_id is None:
+                       # return stack_id
+                       stack_id = len(self.stackTable)
+                       self.stackTable.append(Stack(prefix_id=prefix_id, frame_id=frame_id))
+                       self.stackMap[key] = stack_id
+               return stack_id
+
+       def _intern_string(self, string: str) -> int:
+               """Gets a matching string, or saves the new string. Returns a String ID."""
+               string_id = self.stringMap.get(string)
+               if string_id is not None:
+                       return string_id
+               string_id = len(self.stringTable)
+               self.stringTable.append(string)
+               self.stringMap[string] = string_id
+               return string_id
+
+       def _intern_frame(self, frame_str: str) -> int:
+               """Gets a matching stack frame, or saves the new frame. Returns a Frame ID."""
+               frame_id = self.frameMap.get(frame_str)
+               if frame_id is not None:
+                       return frame_id
+               frame_id = len(self.frameTable)
+               self.frameMap[frame_str] = frame_id
+               string_id = self._intern_string(frame_str)
+
+               symbol_name_to_category = KERNEL_CATEGORY_INDEX if frame_str.find('kallsyms') != -1 \
+               or frame_str.find('/vmlinux') != -1 \
+               or frame_str.endswith('.ko)') \
+               else USER_CATEGORY_INDEX
+
+               self.frameTable.append(Frame(
+                       string_id=string_id,
+                       relevantForJS=False,
+                       innerWindowID=0,
+                       implementation=None,
+                       optimizations=None,
+                       line=None,
+                       column=None,
+                       category=symbol_name_to_category,
+                       subcategory=None,
+               ))
+               return frame_id
+
+       def _add_sample(self, comm: str, stack: List[str], time_ms: Milliseconds) -> None:
+               """Add a timestamped stack trace sample to the thread builder.
+               Args:
+                       comm: command-line (name) of the thread at this sample
+                       stack: sampled stack frames. Root first, leaf last.
+                       time_ms: timestamp of sample in milliseconds.
+               """
+               # Ihreads may not set their names right after they are created.
+               # Instead, they might do it later. In such situations, to use the latest name they have set.
+               if self.comm != comm:
+                       self.comm = comm
+
+               prefix_stack_id = reduce(lambda prefix_id, frame: self._intern_stack
+                                               (self._intern_frame(frame), prefix_id), stack, None)
+               if prefix_stack_id is not None:
+                       self.samples.append(Sample(stack_id=prefix_stack_id,
+                                                                       time_ms=time_ms,
+                                                                       responsiveness=0))
+
+       def _to_json_dict(self) -> Dict:
+               """Converts current Thread to GeckoThread JSON format."""
+               # Gecko profile format is row-oriented data as List[List],
+               # And a schema for interpreting each index.
+               # Schema:
+               # https://github.com/firefox-devtools/profiler/blob/main/docs-developer/gecko-profile-format.md
+               # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L230
+               return {
+                       "tid": self.tid,
+                       "pid": self.pid,
+                       "name": self.comm,
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L51
+                       "markers": {
+                               "schema": {
+                                       "name": 0,
+                                       "startTime": 1,
+                                       "endTime": 2,
+                                       "phase": 3,
+                                       "category": 4,
+                                       "data": 5,
+                               },
+                               "data": [],
+                       },
+
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L90
+                       "samples": {
+                               "schema": {
+                                       "stack": 0,
+                                       "time": 1,
+                                       "responsiveness": 2,
+                               },
+                               "data": self.samples
+                       },
+
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L156
+                       "frameTable": {
+                               "schema": {
+                                       "location": 0,
+                                       "relevantForJS": 1,
+                                       "innerWindowID": 2,
+                                       "implementation": 3,
+                                       "optimizations": 4,
+                                       "line": 5,
+                                       "column": 6,
+                                       "category": 7,
+                                       "subcategory": 8,
+                               },
+                               "data": self.frameTable,
+                       },
+
+                       # https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L216
+                       "stackTable": {
+                               "schema": {
+                                       "prefix": 0,
+                                       "frame": 1,
+                               },
+                               "data": self.stackTable,
+                       },
+                       "stringTable": self.stringTable,
+                       "registerTime": 0,
+                       "unregisterTime": None,
+                       "processType": "default",
+               }
+
+# Uses perf script python interface to parse each
+# event and store the data in the thread builder.
+def process_event(param_dict: Dict) -> None:
+       global start_time
+       global tid_to_thread
+       time_stamp = (param_dict['sample']['time'] // 1000) / 1000
+       pid = param_dict['sample']['pid']
+       tid = param_dict['sample']['tid']
+       comm = param_dict['comm']
+
+       # Start time is the time of the first sample
+       if not start_time:
+               start_time = time_stamp
+
+       # Parse and append the callchain of the current sample into a stack.
+       stack = []
+       if param_dict['callchain']:
+               for call in param_dict['callchain']:
+                       if 'sym' not in call:
+                               continue
+                       stack.append(f'{call["sym"]["name"]} (in {call["dso"]})')
+               if len(stack) != 0:
+                       # Reverse the stack, as root come first and the leaf at the end.
+                       stack = stack[::-1]
+
+       # During perf record if -g is not used, the callchain is not available.
+       # In that case, the symbol and dso are available in the event parameters.
+       else:
+               func = param_dict['symbol'] if 'symbol' in param_dict else '[unknown]'
+               dso = param_dict['dso'] if 'dso' in param_dict else '[unknown]'
+               stack.append(f'{func} (in {dso})')
+
+       # Add sample to the specific thread.
+       thread = tid_to_thread.get(tid)
+       if thread is None:
+               thread = Thread(comm=comm, pid=pid, tid=tid)
+               tid_to_thread[tid] = thread
+       thread._add_sample(comm=comm, stack=stack, time_ms=time_stamp)
+
+# Trace_end runs at the end and will be used to aggregate
+# the data into the final json object and print it out to stdout.
+def trace_end() -> None:
+       threads = [thread._to_json_dict() for thread in tid_to_thread.values()]
+
+       # Schema: https://github.com/firefox-devtools/profiler/blob/53970305b51b9b472e26d7457fee1d66cd4e2737/src/types/gecko-profile.js#L305
+       gecko_profile_with_meta = {
+               "meta": {
+                       "interval": 1,
+                       "processType": 0,
+                       "product": PRODUCT,
+                       "stackwalk": 1,
+                       "debug": 0,
+                       "gcpoison": 0,
+                       "asyncstack": 1,
+                       "startTime": start_time,
+                       "shutdownTime": None,
+                       "version": 24,
+                       "presymbolicated": True,
+                       "categories": CATEGORIES,
+                       "markerSchema": [],
+                       },
+               "libs": [],
+               "threads": threads,
+               "processes": [],
+               "pausedRanges": [],
+       }
+       json.dump(gecko_profile_with_meta, sys.stdout, indent=2)
+
+def main() -> None:
+       global CATEGORIES
+       parser = argparse.ArgumentParser(description="Convert perf.data to Firefox\'s Gecko Profile format")
+
+       # Add the command-line options
+       # Colors must be defined according to this:
+       # https://github.com/firefox-devtools/profiler/blob/50124adbfa488adba6e2674a8f2618cf34b59cd2/res/css/categories.css
+       parser.add_argument('--user-color', default='yellow', help='Color for the User category')
+       parser.add_argument('--kernel-color', default='orange', help='Color for the Kernel category')
+       # Parse the command-line arguments
+       args = parser.parse_args()
+       # Access the values provided by the user
+       user_color = args.user_color
+       kernel_color = args.kernel_color
+
+       CATEGORIES = [
+               {
+                       "name": 'User',
+                       "color": user_color,
+                       "subcategories": ['Other']
+               },
+               {
+                       "name": 'Kernel',
+                       "color": kernel_color,
+                       "subcategories": ['Other']
+               },
+       ]
+
+if __name__ == '__main__':
+    main()
index 8beb460..9ccecd8 100644 (file)
 
 static int epoll_pwait_loop(void)
 {
+       struct epoll_event events;
        int i;
 
        /* Should fail NR_ITERS times */
        for (i = 0; i < NR_ITERS; i++)
-               epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
+               epoll_pwait(-(i + 1), &events, 0, 0, NULL);
        return 0;
 }
 
@@ -124,7 +125,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
        parse_state.error = &parse_error;
        INIT_LIST_HEAD(&parse_state.list);
 
-       err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL);
+       err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL, NULL);
        parse_events_error__exit(&parse_error);
        if (err == -ENODATA) {
                pr_debug("Failed to add events selected by BPF, debuginfo package not installed\n");
index 1f6557c..6accb54 100644 (file)
 static bool dont_fork;
 const char *dso_to_test;
 
-struct test_suite *__weak arch_tests[] = {
+/*
+ * List of architecture specific tests. Not a weak symbol as the array length is
+ * dependent on the initialization, as such GCC with LTO complains of
+ * conflicting definitions with a weak symbol.
+ */
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__)
+extern struct test_suite *arch_tests[];
+#else
+static struct test_suite *arch_tests[] = {
        NULL,
 };
+#endif
 
 static struct test_suite *generic_tests[] = {
        &suite__vmlinux_matches_kallsyms,
diff --git a/tools/perf/tests/config-fragments/README b/tools/perf/tests/config-fragments/README
new file mode 100644 (file)
index 0000000..fe7de5d
--- /dev/null
@@ -0,0 +1,7 @@
+This folder is for kernel config fragments that can be merged with
+defconfig to give full test coverage of a perf test run. This is only
+an optimistic set as some features require hardware support in order to
+pass and not skip.
+
+'config' is shared across all platforms, and for arch specific files,
+the file name should match that used in the ARCH=... make option.
diff --git a/tools/perf/tests/config-fragments/arm64 b/tools/perf/tests/config-fragments/arm64
new file mode 100644 (file)
index 0000000..64c4ab1
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_CORESIGHT_SOURCE_ETM4X=y
diff --git a/tools/perf/tests/config-fragments/config b/tools/perf/tests/config-fragments/config
new file mode 100644 (file)
index 0000000..c340b31
--- /dev/null
@@ -0,0 +1,11 @@
+CONFIG_TRACEPOINTS=y
+CONFIG_STACKTRACE=y
+CONFIG_NOP_TRACER=y
+CONFIG_RING_BUFFER=y
+CONFIG_EVENT_TRACING=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
+CONFIG_TRACING=y
+CONFIG_GENERIC_TRACER=y
+CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_BRANCH_PROFILE_NONE=y
index 569e9d4..779bc86 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="asm_pure_loop"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS=""
 DATV="out"
 DATA="$DATD/perf-$TEST-$DATV.data"
index d21ba85..08a44e5 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="memcpy_thread"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="16 10 1"
 DATV="16k_10"
 DATA="$DATD/perf-$TEST-$DATV.data"
index 7c13636..c83a200 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="thread_loop"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="10 1"
 DATV="check-tid-10th"
 DATA="$DATD/perf-$TEST-$DATV.data"
index a067145..6346fd5 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="thread_loop"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="2 20"
 DATV="check-tid-2th"
 DATA="$DATD/perf-$TEST-$DATV.data"
index f48c852..7304e3d 100755 (executable)
@@ -5,7 +5,7 @@
 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021
 
 TEST="unroll_loop_thread"
-. $(dirname $0)/../lib/coresight.sh
+. "$(dirname $0)"/../lib/coresight.sh
 ARGS="10"
 DATV="10"
 DATA="$DATD/perf-$TEST-$DATV.data"
index 51e3f60..5aa6e2e 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
index 60c5e34..bf4c1fb 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/sh
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
 perf probe -l 2>&1 | grep -q probe:vfs_getname
@@ -10,11 +11,11 @@ cleanup_probe_vfs_getname() {
 }
 
 add_probe_vfs_getname() {
-       local verbose=$1
+       add_probe_verbose=$1
        if [ $had_vfs_getname -eq 1 ] ; then
                line=$(perf probe -L getname_flags 2>&1 | grep -E 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
                perf probe -q       "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
-               perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
+               perf probe $add_probe_verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
        fi
 }
 
index 698343f..3cc158a 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
 # Return true if perf_event_paranoid is > $1 and not running as root.
index e7a3913..bdd5a7c 100644 (file)
@@ -1,3 +1,4 @@
+#!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
 tenths=date\ +%s%1N
index 4a19442..d120e83 100755 (executable)
@@ -21,7 +21,7 @@ trap_cleanup() {
 trap trap_cleanup EXIT TERM INT
 
 check() {
-       if [ `id -u` != 0 ]; then
+       if [ "$(id -u)" != 0 ]; then
                echo "[Skip] No root permission"
                err=2
                exit
@@ -157,10 +157,10 @@ test_lock_filter()
        perf lock contention -i ${perfdata} -L tasklist_lock -q 2> ${result}
 
        # find out the type of tasklist_lock
-       local type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//')
+       test_lock_filter_type=$(head -1 "${result}" | awk '{ print $8 }' | sed -e 's/:.*//')
 
-       if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then
-               echo "[Fail] Recorded result should not have non-${type} locks:" "$(cat "${result}")"
+       if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then
+               echo "[Fail] Recorded result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")"
                err=1
                exit
        fi
@@ -170,8 +170,8 @@ test_lock_filter()
        fi
 
        perf lock con -a -b -L tasklist_lock -q -- perf bench sched messaging > /dev/null 2> ${result}
-       if [ "$(grep -c -v "${type}" "${result}")" != "0" ]; then
-               echo "[Fail] BPF result should not have non-${type} locks:" "$(cat "${result}")"
+       if [ "$(grep -c -v "${test_lock_filter_type}" "${result}")" != "0" ]; then
+               echo "[Fail] BPF result should not have non-${test_lock_filter_type} locks:" "$(cat "${result}")"
                err=1
                exit
        fi
index 5d1b63d..871243d 100755 (executable)
@@ -4,11 +4,11 @@
 # SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
-. $(dirname $0)/lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
 
 skip_if_no_perf_probe || exit 2
 
-. $(dirname $0)/lib/probe_vfs_getname.sh
+. "$(dirname $0)"/lib/probe_vfs_getname.sh
 
 add_probe_vfs_getname || skip_if_no_debuginfo
 err=$?
index 49bd875..8929046 100755 (executable)
@@ -13,25 +13,25 @@ skip_if_no_z_record() {
 collect_z_record() {
        echo "Collecting compressed record file:"
        [ "$(uname -m)" != s390x ] && gflag='-g'
-       $perf_tool record -o $trace_file $gflag -z -F 5000 -- \
+       $perf_tool record -o "$trace_file" $gflag -z -F 5000 -- \
                dd count=500 if=/dev/urandom of=/dev/null
 }
 
 check_compressed_stats() {
        echo "Checking compressed events stats:"
-       $perf_tool report -i $trace_file --header --stats | \
+       $perf_tool report -i "$trace_file" --header --stats | \
                grep -E "(# compressed : Zstd,)|(COMPRESSED events:)"
 }
 
 check_compressed_output() {
-       $perf_tool inject -i $trace_file -o $trace_file.decomp &&
-       $perf_tool report -i $trace_file --stdio -F comm,dso,sym | head -n -3 > $trace_file.comp.output &&
-       $perf_tool report -i $trace_file.decomp --stdio -F comm,dso,sym | head -n -3 > $trace_file.decomp.output &&
-       diff $trace_file.comp.output $trace_file.decomp.output
+       $perf_tool inject -i "$trace_file" -o "$trace_file.decomp" &&
+       $perf_tool report -i "$trace_file" --stdio -F comm,dso,sym | head -n -3 > "$trace_file.comp.output" &&
+       $perf_tool report -i "$trace_file.decomp" --stdio -F comm,dso,sym | head -n -3 > "$trace_file.decomp.output" &&
+       diff "$trace_file.comp.output" "$trace_file.decomp.output"
 }
 
 skip_if_no_z_record || exit 2
 collect_z_record && check_compressed_stats && check_compressed_output
 err=$?
-rm -f $trace_file*
+rm -f "$trace_file*"
 exit $err
index f062ae9..a0d14cd 100755 (executable)
@@ -10,19 +10,19 @@ perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 cleanup() {
   rm -f ${perfdata}
   rm -f ${perfdata}.old
-  trap - exit term int
+  trap - EXIT TERM INT
 }
 
 trap_cleanup() {
   cleanup
   exit 1
 }
-trap trap_cleanup exit term int
+trap trap_cleanup EXIT TERM INT
 
 test_offcpu_priv() {
   echo "Checking off-cpu privilege"
 
-  if [ `id -u` != 0 ]
+  if [ "$(id -u)" != 0 ]
   then
     echo "off-cpu test [Skipped permission]"
     err=2
index 34a0701..d890eb2 100755 (executable)
@@ -6,7 +6,7 @@
 
 set -e
 
-. $(dirname $0)/lib/stat_output.sh
+. "$(dirname $0)"/lib/stat_output.sh
 
 csv_sep=@
 
index 5571ff7..8bae9c8 100755 (executable)
@@ -10,7 +10,7 @@ set -e
 #
 perf stat -e cycles  -x' ' -I1000 --interval-count 1 --summary 2>&1 | \
 grep -e summary | \
-while read summary num event run pct
+while read summary _num _event _run _pct
 do
        if [ $summary != "summary" ]; then
                exit 1
@@ -23,7 +23,7 @@ done
 #
 perf stat -e cycles  -x' ' -I1000 --interval-count 1 --summary --no-csv-summary 2>&1 | \
 grep -e summary | \
-while read num event run pct
+while read _num _event _run _pct
 do
        exit 1
 done
index 0e9cba8..a1918a1 100755 (executable)
@@ -14,7 +14,7 @@ test_global_aggr()
 {
        perf stat -a --no-big-num -e cycles,instructions sleep 1  2>&1 | \
        grep -e cycles -e instructions | \
-       while read num evt hash ipc rest
+       while read num evt _hash ipc rest
        do
                # skip not counted events
                if [ "$num" = "<not" ]; then
@@ -45,7 +45,7 @@ test_no_aggr()
 {
        perf stat -a -A --no-big-num -e cycles,instructions sleep 1  2>&1 | \
        grep ^CPU | \
-       while read cpu num evt hash ipc rest
+       while read cpu num evt _hash ipc rest
        do
                # skip not counted events
                if [ "$num" = "<not" ]; then
index f972b31..fb2b105 100755 (executable)
@@ -6,7 +6,7 @@
 
 set -e
 
-. $(dirname $0)/lib/stat_output.sh
+. "$(dirname $0)"/lib/stat_output.sh
 
 stat_output=$(mktemp /tmp/__perf_test.stat_output.std.XXXXX)
 
@@ -28,7 +28,6 @@ trap trap_cleanup EXIT TERM INT
 
 function commachecker()
 {
-       local -i cnt=0
        local prefix=1
 
        case "$1"
index 13473ae..513cd1e 100755 (executable)
@@ -31,12 +31,12 @@ if ! perf stat --bpf-counters true > /dev/null 2>&1; then
 fi
 
 base_cycles=$(perf stat --no-big-num -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
-if [ "$base_cycles" == "<not" ]; then
+if [ "$base_cycles" = "<not" ]; then
        echo "Skipping: cycles event not counted"
        exit 2
 fi
 bpf_cycles=$(perf stat --no-big-num --bpf-counters -e cycles -- perf bench sched messaging -g 1 -l 100 -t 2>&1 | awk '/cycles/ {print $1}')
-if [ "$bpf_cycles" == "<not" ]; then
+if [ "$bpf_cycles" = "<not" ]; then
        echo "Failed: cycles not counted with --bpf-counters"
        exit 1
 fi
index d724855..a74440a 100755 (executable)
@@ -25,22 +25,22 @@ check_bpf_counter()
 find_cgroups()
 {
        # try usual systemd slices first
-       if [ -d /sys/fs/cgroup/system.slice -a -d /sys/fs/cgroup/user.slice ]; then
+       if [ -d /sys/fs/cgroup/system.slice ] && [ -d /sys/fs/cgroup/user.slice ]; then
                test_cgroups="system.slice,user.slice"
                return
        fi
 
        # try root and self cgroups
-       local self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
-       if [ -z ${self_cgrp} ]; then
+       find_cgroups_self_cgrp=$(grep perf_event /proc/self/cgroup | cut -d: -f3)
+       if [ -z ${find_cgroups_self_cgrp} ]; then
                # cgroup v2 doesn't specify perf_event
-               self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
+               find_cgroups_self_cgrp=$(grep ^0: /proc/self/cgroup | cut -d: -f3)
        fi
 
-       if [ -z ${self_cgrp} ]; then
+       if [ -z ${find_cgroups_self_cgrp} ]; then
                test_cgroups="/"
        else
-               test_cgroups="/,${self_cgrp}"
+               test_cgroups="/,${find_cgroups_self_cgrp}"
        fi
 }
 
@@ -48,13 +48,11 @@ find_cgroups()
 # Just check if it runs without failure and has non-zero results.
 check_system_wide_counted()
 {
-       local output
-
-       output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
-       if echo ${output} | grep -q -F "<not "; then
+       check_system_wide_counted_output=$(perf stat -a --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, sleep 1  2>&1)
+       if echo ${check_system_wide_counted_output} | grep -q -F "<not "; then
                echo "Some system-wide events are not counted"
                if [ "${verbose}" = "1" ]; then
-                       echo ${output}
+                       echo ${check_system_wide_counted_output}
                fi
                exit 1
        fi
@@ -62,13 +60,11 @@ check_system_wide_counted()
 
 check_cpu_list_counted()
 {
-       local output
-
-       output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
-       if echo ${output} | grep -q -F "<not "; then
+       check_cpu_list_counted_output=$(perf stat -C 1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1  2>&1)
+       if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then
                echo "Some CPU events are not counted"
                if [ "${verbose}" = "1" ]; then
-                       echo ${output}
+                       echo ${check_cpu_list_counted_output}
                fi
                exit 1
        fi
index fad3616..1a7e6a8 100755 (executable)
@@ -22,7 +22,7 @@ cleanup_files()
        rm -f ${PERF_DATA}
 }
 
-trap cleanup_files exit term int
+trap cleanup_files EXIT TERM INT
 
 echo "Recording workload..."
 perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 &
index 72ac6c8..6ded58f 100755 (executable)
@@ -39,7 +39,7 @@ test_json_converter_command()
        echo "Testing Perf Data Convertion Command to JSON"
        perf record -o "$perfdata" -F 99 -g -- perf test -w noploop > /dev/null 2>&1
        perf data convert --to-json "$result" --force -i "$perfdata" >/dev/null 2>&1
-       if [ $(cat "${result}" | wc -l) -gt "0" ] ; then
+       if [ "$(cat ${result} | wc -l)" -gt "0" ] ; then
                echo "Perf Data Converter Command to JSON [SUCCESS]"
        else
                echo "Perf Data Converter Command to JSON [FAILED]"
index 0095abb..92d1515 100755 (executable)
@@ -52,7 +52,7 @@ find_str_or_fail() {
 
 # check if perf is compiled with libtraceevent support
 skip_no_probe_record_support() {
-       perf record -e "sched:sched_switch" -a -- sleep 1 2>&1 | grep "libtraceevent is necessary for tracepoint support" && return 2
+       perf version --build-options | grep -q " OFF .* HAVE_LIBTRACEEVENT" && return 2
        return 0
 }
 
index 0a4bac3..db2ff14 100755 (executable)
 # SPDX-License-Identifier: GPL-2.0
 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
 
-. $(dirname $0)/lib/probe.sh
+. "$(dirname $0)"/lib/probe.sh
 
 skip_if_no_perf_probe || exit 2
 skip_if_no_perf_trace || exit 2
 
-. $(dirname $0)/lib/probe_vfs_getname.sh
+. "$(dirname $0)"/lib/probe_vfs_getname.sh
 
 trace_open_vfs_getname() {
-       evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+       evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')"
        perf trace -e $evts touch $file 2>&1 | \
        grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
index 5009740..706780f 100644 (file)
@@ -27,7 +27,7 @@ static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
                                     struct machine *machine __maybe_unused)
 {
        struct perf_record_stat_config *config = &event->stat_config;
-       struct perf_stat_config stat_config;
+       struct perf_stat_config stat_config = {};
 
 #define HAS(term, val) \
        has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
index 37c53ba..cc09dca 100755 (executable)
@@ -17,8 +17,7 @@ arch_string()
 
 asm_errno_file()
 {
-       local arch="$1"
-       local header
+       arch="$1"
 
        header="$toolsdir/arch/$arch/include/uapi/asm/errno.h"
        if test -r "$header"; then
@@ -30,8 +29,7 @@ asm_errno_file()
 
 create_errno_lookup_func()
 {
-       local arch=$(arch_string "$1")
-       local nr name
+       arch=$(arch_string "$1")
 
        printf "static const char *errno_to_name__%s(int err)\n{\n\tswitch (err) {\n" $arch
 
@@ -44,8 +42,8 @@ create_errno_lookup_func()
 
 process_arch()
 {
-       local arch="$1"
-       local asm_errno=$(asm_errno_file "$arch")
+       arch="$1"
+       asm_errno=$(asm_errno_file "$arch")
 
        $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \
                |grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
@@ -56,9 +54,8 @@ process_arch()
 
 create_arch_errno_table_func()
 {
-       local archlist="$1"
-       local default="$2"
-       local arch
+       archlist="$1"
+       default="$2"
 
        printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n'
        printf '{\n'
index fd5c740..b1596df 100755 (executable)
@@ -7,9 +7,9 @@
 prctl_arch_header=${x86_header_dir}/prctl.h
 
 print_range () {
-       local idx=$1
-       local prefix=$2
-       local first_entry=$3
+       idx=$1
+       prefix=$2
+       first_entry=$3
 
        printf "#define x86_arch_prctl_codes_%d_offset %s\n" $idx $first_entry
        printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx
index c7ad9e0..70db5a7 100644 (file)
@@ -407,11 +407,6 @@ static bool hist_browser__selection_has_children(struct hist_browser *browser)
        return container_of(ms, struct callchain_list, ms)->has_children;
 }
 
-static bool hist_browser__he_selection_unfolded(struct hist_browser *browser)
-{
-       return browser->he_selection ? browser->he_selection->unfolded : false;
-}
-
 static bool hist_browser__selection_unfolded(struct hist_browser *browser)
 {
        struct hist_entry *he = browser->he_selection;
@@ -584,8 +579,8 @@ static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
        return n;
 }
 
-static void __hist_entry__set_folding(struct hist_entry *he,
-                                     struct hist_browser *hb, bool unfold)
+static void hist_entry__set_folding(struct hist_entry *he,
+                                   struct hist_browser *hb, bool unfold)
 {
        hist_entry__init_have_children(he);
        he->unfolded = unfold ? he->has_children : false;
@@ -603,34 +598,12 @@ static void __hist_entry__set_folding(struct hist_entry *he,
                he->nr_rows = 0;
 }
 
-static void hist_entry__set_folding(struct hist_entry *he,
-                                   struct hist_browser *browser, bool unfold)
-{
-       double percent;
-
-       percent = hist_entry__get_percent_limit(he);
-       if (he->filtered || percent < browser->min_pcnt)
-               return;
-
-       __hist_entry__set_folding(he, browser, unfold);
-
-       if (!he->depth || unfold)
-               browser->nr_hierarchy_entries++;
-       if (he->leaf)
-               browser->nr_callchain_rows += he->nr_rows;
-       else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
-               browser->nr_hierarchy_entries++;
-               he->has_no_entry = true;
-               he->nr_rows = 1;
-       } else
-               he->has_no_entry = false;
-}
-
 static void
 __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
 {
        struct rb_node *nd;
        struct hist_entry *he;
+       double percent;
 
        nd = rb_first_cached(&browser->hists->entries);
        while (nd) {
@@ -640,6 +613,21 @@ __hist_browser__set_folding(struct hist_browser *browser, bool unfold)
                nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
 
                hist_entry__set_folding(he, browser, unfold);
+
+               percent = hist_entry__get_percent_limit(he);
+               if (he->filtered || percent < browser->min_pcnt)
+                       continue;
+
+               if (!he->depth || unfold)
+                       browser->nr_hierarchy_entries++;
+               if (he->leaf)
+                       browser->nr_callchain_rows += he->nr_rows;
+               else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+                       browser->nr_hierarchy_entries++;
+                       he->has_no_entry = true;
+                       he->nr_rows = 1;
+               } else
+                       he->has_no_entry = false;
        }
 }
 
@@ -659,8 +647,10 @@ static void hist_browser__set_folding_selected(struct hist_browser *browser, boo
        if (!browser->he_selection)
                return;
 
-       hist_entry__set_folding(browser->he_selection, browser, unfold);
-       browser->b.nr_entries = hist_browser__nr_entries(browser);
+       if (unfold == browser->he_selection->unfolded)
+               return;
+
+       hist_browser__toggle_fold(browser);
 }
 
 static void ui_browser__warn_lost_events(struct ui_browser *browser)
@@ -732,8 +722,8 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l
                hist_browser__set_folding(browser, true);
                break;
        case 'e':
-               /* Expand the selected entry. */
-               hist_browser__set_folding_selected(browser, !hist_browser__he_selection_unfolded(browser));
+               /* Toggle expand/collapse the selected entry. */
+               hist_browser__toggle_fold(browser);
                break;
        case 'H':
                browser->show_headers = !browser->show_headers;
@@ -1779,7 +1769,7 @@ static void hists_browser__hierarchy_headers(struct hist_browser *browser)
        hists_browser__scnprintf_hierarchy_headers(browser, headers,
                                                   sizeof(headers));
 
-       ui_browser__gotorc(&browser->b, 0, 0);
+       ui_browser__gotorc_title(&browser->b, 0, 0);
        ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
        ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
 }
index 96f4ea1..d487aec 100644 (file)
@@ -1,3 +1,5 @@
+include $(srctree)/tools/scripts/utilities.mak
+
 perf-y += arm64-frame-pointer-unwind-support.o
 perf-y += addr_location.o
 perf-y += annotate.o
@@ -246,7 +248,7 @@ $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-
 
 $(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
        $(call rule_mkdir)
-       $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
+       $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) $(BISON_FALLBACK_FLAGS) \
                -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
 
 $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
@@ -279,28 +281,48 @@ $(OUTPUT)util/bpf-filter-bison.c $(OUTPUT)util/bpf-filter-bison.h: util/bpf-filt
        $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) $(BISON_FILE_PREFIX_MAP) \
                -o $(OUTPUT)util/bpf-filter-bison.c -p perf_bpf_filter_
 
-FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e  's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
-ifeq ($(FLEX_GE_26),1)
-  flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
-  CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
-  ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
-    flex_flags += -Wno-misleading-indentation
+FLEX_VERSION := $(shell $(FLEX) --version | cut -d' ' -f2)
+
+FLEX_GE_260 := $(call version-ge3,$(FLEX_VERSION),2.6.0)
+ifeq ($(FLEX_GE_260),1)
+  flex_flags := -Wno-redundant-decls -Wno-switch-default -Wno-unused-function -Wno-misleading-indentation
+
+  # Some newer clang and gcc version complain about this
+  # util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable]
+  #  int yynerrs = 0;
+
+  flex_flags += -Wno-unused-but-set-variable
+
+  FLEX_LT_262 := $(call version-lt3,$(FLEX_VERSION),2.6.2)
+  ifeq ($(FLEX_LT_262),1)
+    flex_flags += -Wno-sign-compare
   endif
 else
   flex_flags := -w
 endif
+
 CFLAGS_parse-events-flex.o  += $(flex_flags)
 CFLAGS_pmu-flex.o           += $(flex_flags)
 CFLAGS_expr-flex.o          += $(flex_flags)
 CFLAGS_bpf-filter-flex.o    += $(flex_flags)
 
-bison_flags := -DYYENABLE_NLS=0
-BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
-ifeq ($(BISON_GE_35),1)
-  bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum -Wno-unused-but-set-variable -Wno-unknown-warning-option
+# Some newer clang and gcc version complain about this
+# util/parse-events-bison.c:1317:9: error: variable 'parse_events_nerrs' set but not used [-Werror,-Wunused-but-set-variable]
+#  int yynerrs = 0;
+
+bison_flags := -DYYENABLE_NLS=0 -Wno-unused-but-set-variable
+BISON_GE_382 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \>\= 382)
+ifeq ($(BISON_GE_382),1)
+  bison_flags += -Wno-switch-enum
 else
   bison_flags += -w
 endif
+
+BISON_LT_381 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\).\([0-9]\+\)/\1\2\3/g') \< 381)
+ifeq ($(BISON_LT_381),1)
+  bison_flags += -DYYNOMEM=YYABORT
+endif
+
 CFLAGS_parse-events-bison.o += $(bison_flags)
 CFLAGS_pmu-bison.o          += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
 CFLAGS_expr-bison.o         += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
@@ -316,8 +338,6 @@ CFLAGS_find_bit.o      += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
 CFLAGS_rbtree.o        += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_libstring.o     += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
 CFLAGS_hweight.o       += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_parse-events.o  += -Wno-redundant-decls
-CFLAGS_expr.o          += -Wno-redundant-decls
 CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
 CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/
 
index ba988a1..82956ad 100644 (file)
@@ -1846,8 +1846,11 @@ static int symbol__disassemble_bpf(struct symbol *sym,
        perf_exe(tpath, sizeof(tpath));
 
        bfdf = bfd_openr(tpath, NULL);
-       assert(bfdf);
-       assert(bfd_check_format(bfdf, bfd_object));
+       if (bfdf == NULL)
+               abort();
+
+       if (!bfd_check_format(bfdf, bfd_object))
+               abort();
 
        s = open_memstream(&buf, &buf_size);
        if (!s) {
@@ -1895,7 +1898,8 @@ static int symbol__disassemble_bpf(struct symbol *sym,
 #else
        disassemble = disassembler(bfdf);
 #endif
-       assert(disassemble);
+       if (disassemble == NULL)
+               abort();
 
        fflush(s);
        do {
index 0b30688..47f01df 100644 (file)
@@ -9,8 +9,8 @@
 #include "util/evsel.h"
 
 #include "util/bpf-filter.h"
-#include "util/bpf-filter-flex.h"
-#include "util/bpf-filter-bison.h"
+#include <util/bpf-filter-flex.h>
+#include <util/bpf-filter-bison.h>
 
 #include "bpf_skel/sample-filter.h"
 #include "bpf_skel/sample_filter.skel.h"
index 07d6c79..5dfa948 100644 (file)
@@ -9,6 +9,8 @@
 #include <linux/list.h>
 #include "bpf-filter.h"
 
+int perf_bpf_filter_lex(void);
+
 static void perf_bpf_filter_error(struct list_head *expr __maybe_unused,
                                  char const *msg)
 {
index 44cde27..50e4269 100644 (file)
@@ -32,9 +32,6 @@
 
 #include <internal/xyarray.h>
 
-/* temporarily disable libbpf deprecation warnings */
-#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
-
 static int libbpf_perf_print(enum libbpf_print_level level __attribute__((unused)),
                              const char *fmt, va_list args)
 {
@@ -1091,7 +1088,6 @@ enum bpf_map_op_type {
 
 enum bpf_map_key_type {
        BPF_MAP_KEY_ALL,
-       BPF_MAP_KEY_RANGES,
 };
 
 struct bpf_map_op {
@@ -1099,9 +1095,6 @@ struct bpf_map_op {
        enum bpf_map_op_type op_type;
        enum bpf_map_key_type key_type;
        union {
-               struct parse_events_array array;
-       } k;
-       union {
                u64 value;
                struct evsel *evsel;
        } v;
@@ -1116,8 +1109,6 @@ bpf_map_op__delete(struct bpf_map_op *op)
 {
        if (!list_empty(&op->list))
                list_del_init(&op->list);
-       if (op->key_type == BPF_MAP_KEY_RANGES)
-               parse_events__clear_array(&op->k.array);
        free(op);
 }
 
@@ -1196,18 +1187,6 @@ bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
        if (!term)
                return 0;
 
-       if (term->array.nr_ranges) {
-               size_t memsz = term->array.nr_ranges *
-                               sizeof(op->k.array.ranges[0]);
-
-               op->k.array.ranges = memdup(term->array.ranges, memsz);
-               if (!op->k.array.ranges) {
-                       pr_debug("Not enough memory to alloc indices for map\n");
-                       return -ENOMEM;
-               }
-               op->key_type = BPF_MAP_KEY_RANGES;
-               op->k.array.nr_ranges = term->array.nr_ranges;
-       }
        return 0;
 }
 
@@ -1244,18 +1223,6 @@ bpf_map_op__clone(struct bpf_map_op *op)
        }
 
        INIT_LIST_HEAD(&newop->list);
-       if (op->key_type == BPF_MAP_KEY_RANGES) {
-               size_t memsz = op->k.array.nr_ranges *
-                              sizeof(op->k.array.ranges[0]);
-
-               newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
-               if (!newop->k.array.ranges) {
-                       pr_debug("Failed to alloc indices for map\n");
-                       free(newop);
-                       return NULL;
-               }
-       }
-
        return newop;
 }
 
@@ -1457,40 +1424,6 @@ struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
 };
 
 static int
-config_map_indices_range_check(struct parse_events_term *term,
-                              struct bpf_map *map,
-                              const char *map_name)
-{
-       struct parse_events_array *array = &term->array;
-       unsigned int i;
-
-       if (!array->nr_ranges)
-               return 0;
-       if (!array->ranges) {
-               pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
-                        map_name, (int)array->nr_ranges);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       if (!map) {
-               pr_debug("Map '%s' is invalid\n", map_name);
-               return -BPF_LOADER_ERRNO__INTERNAL;
-       }
-
-       for (i = 0; i < array->nr_ranges; i++) {
-               unsigned int start = array->ranges[i].start;
-               size_t length = array->ranges[i].length;
-               unsigned int idx = start + length - 1;
-
-               if (idx >= bpf_map__max_entries(map)) {
-                       pr_debug("ERROR: index %d too large\n", idx);
-                       return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
-               }
-       }
-       return 0;
-}
-
-static int
 bpf__obj_config_map(struct bpf_object *obj,
                    struct parse_events_term *term,
                    struct evlist *evlist,
@@ -1525,12 +1458,6 @@ bpf__obj_config_map(struct bpf_object *obj,
                goto out;
        }
 
-       *key_scan_pos += strlen(map_opt);
-       err = config_map_indices_range_check(term, map, map_name);
-       if (err)
-               goto out;
-       *key_scan_pos -= strlen(map_opt);
-
        for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
                struct bpf_obj_config__map_func *func =
                                &bpf_obj_config__map_funcs[i];
@@ -1579,7 +1506,6 @@ typedef int (*map_config_func_t)(const char *name, int map_fd,
                                 const struct bpf_map *map,
                                 struct bpf_map_op *op,
                                 void *pkey, void *arg);
-
 static int
 foreach_key_array_all(map_config_func_t func,
                      void *arg, const char *name,
@@ -1600,32 +1526,6 @@ foreach_key_array_all(map_config_func_t func,
        return 0;
 }
 
-static int
-foreach_key_array_ranges(map_config_func_t func, void *arg,
-                        const char *name, int map_fd,
-                        const struct bpf_map *map,
-                        struct bpf_map_op *op)
-{
-       unsigned int i, j;
-       int err;
-
-       for (i = 0; i < op->k.array.nr_ranges; i++) {
-               unsigned int start = op->k.array.ranges[i].start;
-               size_t length = op->k.array.ranges[i].length;
-
-               for (j = 0; j < length; j++) {
-                       unsigned int idx = start + j;
-
-                       err = func(name, map_fd, map, op, &idx, arg);
-                       if (err) {
-                               pr_debug("ERROR: failed to insert value to %s[%u]\n",
-                                        name, idx);
-                               return err;
-                       }
-               }
-       }
-       return 0;
-}
 
 static int
 bpf_map_config_foreach_key(struct bpf_map *map,
@@ -1666,10 +1566,6 @@ bpf_map_config_foreach_key(struct bpf_map *map,
                                err = foreach_key_array_all(func, arg, name,
                                                            map_fd, map, op);
                                break;
-                       case BPF_MAP_KEY_RANGES:
-                               err = foreach_key_array_ranges(func, arg, name,
-                                                              map_fd, map, op);
-                               break;
                        default:
                                pr_debug("ERROR: keytype for map '%s' invalid\n",
                                         name);
diff --git a/tools/perf/util/bpf_skel/bench_uprobe.bpf.c b/tools/perf/util/bpf_skel/bench_uprobe.bpf.c
new file mode 100644 (file)
index 0000000..2c55896
--- /dev/null
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2023 Red Hat
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+unsigned int nr_uprobes;
+
+SEC("uprobe")
+int BPF_UPROBE(empty)
+{
+       return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(trace_printk)
+{
+       char fmt[] = "perf bench uprobe %u";
+
+       bpf_trace_printk(fmt, sizeof(fmt), ++nr_uprobes);
+       return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
index 613ecfd..8610d03 100644 (file)
@@ -1,2 +1,5 @@
 perf-$(CONFIG_CLANGLLVM) += clang.o
 perf-$(CONFIG_CLANGLLVM) += clang-test.o
+
+CXXFLAGS_clang.o += -Wno-unused-parameter
+CXXFLAGS_clang-test.o += -Wno-unused-parameter
index 4cbb092..923c0fb 100644 (file)
@@ -93,8 +93,8 @@ struct process_symbol_args {
        u64        start;
 };
 
-static int find_symbol_cb(void *arg, const char *name, char type,
-                         u64 start)
+static int find_func_symbol_cb(void *arg, const char *name, char type,
+                              u64 start)
 {
        struct process_symbol_args *args = arg;
 
@@ -110,12 +110,36 @@ static int find_symbol_cb(void *arg, const char *name, char type,
        return 1;
 }
 
+static int find_any_symbol_cb(void *arg, const char *name,
+                             char type __maybe_unused, u64 start)
+{
+       struct process_symbol_args *args = arg;
+
+       if (strcmp(name, args->name))
+               return 0;
+
+       args->start = start;
+       return 1;
+}
+
 int kallsyms__get_function_start(const char *kallsyms_filename,
                                 const char *symbol_name, u64 *addr)
 {
        struct process_symbol_args args = { .name = symbol_name, };
 
-       if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
+       if (kallsyms__parse(kallsyms_filename, &args, find_func_symbol_cb) <= 0)
+               return -1;
+
+       *addr = args.start;
+       return 0;
+}
+
+int kallsyms__get_symbol_start(const char *kallsyms_filename,
+                              const char *symbol_name, u64 *addr)
+{
+       struct process_symbol_args args = { .name = symbol_name, };
+
+       if (kallsyms__parse(kallsyms_filename, &args, find_any_symbol_cb) <= 0)
                return -1;
 
        *addr = args.start;
index de20e01..d8bcee2 100644 (file)
@@ -360,6 +360,8 @@ size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FIL
 
 int kallsyms__get_function_start(const char *kallsyms_filename,
                                 const char *symbol_name, u64 *addr);
+int kallsyms__get_symbol_start(const char *kallsyms_filename,
+                              const char *symbol_name, u64 *addr);
 
 void event_attr_init(struct perf_event_attr *attr);
 
index 762e2b2..e41bc4d 100644 (file)
@@ -1474,6 +1474,7 @@ void evsel__exit(struct evsel *evsel)
        perf_thread_map__put(evsel->core.threads);
        zfree(&evsel->group_name);
        zfree(&evsel->name);
+       zfree(&evsel->filter);
        zfree(&evsel->pmu_name);
        zfree(&evsel->group_pmu_name);
        zfree(&evsel->unit);
index 4814262..7410a16 100644 (file)
@@ -10,8 +10,8 @@
 #include "debug.h"
 #include "evlist.h"
 #include "expr.h"
-#include "expr-bison.h"
-#include "expr-flex.h"
+#include <util/expr-bison.h>
+#include <util/expr-flex.h>
 #include "util/hashmap.h"
 #include "smt.h"
 #include "tsc.h"
index dd504af..65d54a6 100644 (file)
@@ -7,6 +7,8 @@
 #include "util/debug.h"
 #define IN_EXPR_Y 1
 #include "expr.h"
+#include "expr-bison.h"
+int expr_lex(YYSTYPE * yylval_param , void *yyscanner);
 %}
 
 %define api.pure full
@@ -56,7 +58,7 @@
 static void expr_error(double *final_val __maybe_unused,
                       struct expr_parse_ctx *ctx __maybe_unused,
                       bool compute_ids __maybe_unused,
-                      void *scanner,
+                      void *scanner __maybe_unused,
                       const char *s)
 {
        pr_debug("%s\n", s);
index 4e62843..11de3ca 100644 (file)
@@ -1216,7 +1216,9 @@ static int machine__get_running_kernel_start(struct machine *machine,
 
        *start = addr;
 
-       err = kallsyms__get_function_start(filename, "_etext", &addr);
+       err = kallsyms__get_symbol_start(filename, "_edata", &addr);
+       if (err)
+               err = kallsyms__get_function_start(filename, "_etext", &addr);
        if (!err)
                *end = addr;
 
index c9ec0ca..0b5075e 100644 (file)
@@ -18,8 +18,8 @@
 #include "debug.h"
 #include <api/fs/tracing_path.h>
 #include <perf/cpumap.h>
-#include "parse-events-bison.h"
-#include "parse-events-flex.h"
+#include <util/parse-events-bison.h>
+#include <util/parse-events-flex.h>
 #include "pmu.h"
 #include "pmus.h"
 #include "asm/bug.h"
@@ -35,7 +35,6 @@
 #ifdef PARSER_DEBUG
 extern int parse_events_debug;
 #endif
-int parse_events_parse(void *parse_state, void *scanner);
 static int get_config_terms(struct list_head *head_config,
                            struct list_head *head_terms __maybe_unused);
 
@@ -499,7 +498,7 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
 
 #ifdef HAVE_LIBTRACEEVENT
 static void tracepoint_error(struct parse_events_error *e, int err,
-                            const char *sys, const char *name)
+                            const char *sys, const char *name, int column)
 {
        const char *str;
        char help[BUFSIZ];
@@ -526,18 +525,19 @@ static void tracepoint_error(struct parse_events_error *e, int err,
        }
 
        tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name);
-       parse_events_error__handle(e, 0, strdup(str), strdup(help));
+       parse_events_error__handle(e, column, strdup(str), strdup(help));
 }
 
 static int add_tracepoint(struct list_head *list, int *idx,
                          const char *sys_name, const char *evt_name,
                          struct parse_events_error *err,
-                         struct list_head *head_config)
+                         struct list_head *head_config, void *loc_)
 {
+       YYLTYPE *loc = loc_;
        struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
 
        if (IS_ERR(evsel)) {
-               tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
+               tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name, loc->first_column);
                return PTR_ERR(evsel);
        }
 
@@ -556,7 +556,7 @@ static int add_tracepoint(struct list_head *list, int *idx,
 static int add_tracepoint_multi_event(struct list_head *list, int *idx,
                                      const char *sys_name, const char *evt_name,
                                      struct parse_events_error *err,
-                                     struct list_head *head_config)
+                                     struct list_head *head_config, YYLTYPE *loc)
 {
        char *evt_path;
        struct dirent *evt_ent;
@@ -565,13 +565,13 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 
        evt_path = get_events_file(sys_name);
        if (!evt_path) {
-               tracepoint_error(err, errno, sys_name, evt_name);
+               tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
                return -1;
        }
        evt_dir = opendir(evt_path);
        if (!evt_dir) {
                put_events_file(evt_path);
-               tracepoint_error(err, errno, sys_name, evt_name);
+               tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
                return -1;
        }
 
@@ -588,11 +588,11 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
                found++;
 
                ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
-                                    err, head_config);
+                                    err, head_config, loc);
        }
 
        if (!found) {
-               tracepoint_error(err, ENOENT, sys_name, evt_name);
+               tracepoint_error(err, ENOENT, sys_name, evt_name, loc->first_column);
                ret = -1;
        }
 
@@ -604,19 +604,19 @@ static int add_tracepoint_multi_event(struct list_head *list, int *idx,
 static int add_tracepoint_event(struct list_head *list, int *idx,
                                const char *sys_name, const char *evt_name,
                                struct parse_events_error *err,
-                               struct list_head *head_config)
+                               struct list_head *head_config, YYLTYPE *loc)
 {
        return strpbrk(evt_name, "*?") ?
-              add_tracepoint_multi_event(list, idx, sys_name, evt_name,
-                                         err, head_config) :
-              add_tracepoint(list, idx, sys_name, evt_name,
-                             err, head_config);
+               add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+                                          err, head_config, loc) :
+               add_tracepoint(list, idx, sys_name, evt_name,
+                              err, head_config, loc);
 }
 
 static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
                                    const char *sys_name, const char *evt_name,
                                    struct parse_events_error *err,
-                                   struct list_head *head_config)
+                                   struct list_head *head_config, YYLTYPE *loc)
 {
        struct dirent *events_ent;
        DIR *events_dir;
@@ -624,7 +624,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 
        events_dir = tracing_events__opendir();
        if (!events_dir) {
-               tracepoint_error(err, errno, sys_name, evt_name);
+               tracepoint_error(err, errno, sys_name, evt_name, loc->first_column);
                return -1;
        }
 
@@ -640,7 +640,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
                        continue;
 
                ret = add_tracepoint_event(list, idx, events_ent->d_name,
-                                          evt_name, err, head_config);
+                                          evt_name, err, head_config, loc);
        }
 
        closedir(events_dir);
@@ -653,6 +653,7 @@ struct __add_bpf_event_param {
        struct parse_events_state *parse_state;
        struct list_head *list;
        struct list_head *head_config;
+       YYLTYPE *loc;
 };
 
 static int add_bpf_event(const char *group, const char *event, int fd, struct bpf_object *obj,
@@ -679,7 +680,7 @@ static int add_bpf_event(const char *group, const char *event, int fd, struct bp
 
        err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group,
                                          event, parse_state->error,
-                                         param->head_config);
+                                         param->head_config, param->loc);
        if (err) {
                struct evsel *evsel, *tmp;
 
@@ -706,12 +707,14 @@ static int add_bpf_event(const char *group, const char *event, int fd, struct bp
 int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
                              struct list_head *list,
                              struct bpf_object *obj,
-                             struct list_head *head_config)
+                             struct list_head *head_config,
+                             void *loc)
 {
        int err;
        char errbuf[BUFSIZ];
-       struct __add_bpf_event_param param = {parse_state, list, head_config};
+       struct __add_bpf_event_param param = {parse_state, list, head_config, loc};
        static bool registered_unprobe_atexit = false;
+       YYLTYPE test_loc = {.first_column = -1};
 
        if (IS_ERR(obj) || !obj) {
                snprintf(errbuf, sizeof(errbuf),
@@ -742,6 +745,9 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
                goto errout;
        }
 
+       if (!param.loc)
+               param.loc = &test_loc;
+
        err = bpf__foreach_event(obj, add_bpf_event, &param);
        if (err) {
                snprintf(errbuf, sizeof(errbuf),
@@ -751,7 +757,7 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
 
        return 0;
 errout:
-       parse_events_error__handle(parse_state->error, 0,
+       parse_events_error__handle(parse_state->error, param.loc ? param.loc->first_column : 0,
                                strdup(errbuf), strdup("(add -v to see detail)"));
        return err;
 }
@@ -762,7 +768,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state,
                        struct list_head *head_config)
 {
        struct parse_events_term *term;
-       int error_pos;
+       int error_pos = 0;
 
        if (!head_config || list_empty(head_config))
                return 0;
@@ -793,13 +799,7 @@ parse_events_config_bpf(struct parse_events_state *parse_state,
 
                        parse_events_error__handle(parse_state->error, idx,
                                                strdup(errbuf),
-                                               strdup(
-"Hint:\tValid config terms:\n"
-"     \tmap:[<arraymap>].value<indices>=[value]\n"
-"     \tmap:[<eventmap>].event<indices>=[event]\n"
-"\n"
-"     \twhere <indices> is something like [0,3...5] or [all]\n"
-"     \t(add -v to see detail)"));
+                                               NULL);
                        return err;
                }
        }
@@ -839,11 +839,13 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
                          struct list_head *list,
                          char *bpf_file_name,
                          bool source,
-                         struct list_head *head_config)
+                         struct list_head *head_config,
+                         void *loc_)
 {
        int err;
        struct bpf_object *obj;
        LIST_HEAD(obj_head_config);
+       YYLTYPE *loc = loc_;
 
        if (head_config)
                split_bpf_config_terms(head_config, &obj_head_config);
@@ -863,12 +865,12 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
                                                   -err, errbuf,
                                                   sizeof(errbuf));
 
-               parse_events_error__handle(parse_state->error, 0,
+               parse_events_error__handle(parse_state->error, loc->first_column,
                                        strdup(errbuf), strdup("(add -v to see detail)"));
                return err;
        }
 
-       err = parse_events_load_bpf_obj(parse_state, list, obj, head_config);
+       err = parse_events_load_bpf_obj(parse_state, list, obj, head_config, loc);
        if (err)
                return err;
        err = parse_events_config_bpf(parse_state, obj, &obj_head_config);
@@ -885,9 +887,12 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
 int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
                              struct list_head *list __maybe_unused,
                              struct bpf_object *obj __maybe_unused,
-                             struct list_head *head_config __maybe_unused)
+                             struct list_head *head_config __maybe_unused,
+                             void *loc_)
 {
-       parse_events_error__handle(parse_state->error, 0,
+       YYLTYPE *loc = loc_;
+
+       parse_events_error__handle(parse_state->error, loc->first_column,
                                   strdup("BPF support is not compiled"),
                                   strdup("Make sure libbpf-devel is available at build time."));
        return -ENOTSUP;
@@ -897,9 +902,12 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
                          struct list_head *list __maybe_unused,
                          char *bpf_file_name __maybe_unused,
                          bool source __maybe_unused,
-                         struct list_head *head_config __maybe_unused)
+                         struct list_head *head_config __maybe_unused,
+                         void *loc_)
 {
-       parse_events_error__handle(parse_state->error, 0,
+       YYLTYPE *loc = loc_;
+
+       parse_events_error__handle(parse_state->error, loc->first_column,
                                   strdup("BPF support is not compiled"),
                                   strdup("Make sure libbpf-devel is available at build time."));
        return -ENOTSUP;
@@ -1441,8 +1449,9 @@ static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
                                const char *sys, const char *event,
                                struct parse_events_error *err,
-                               struct list_head *head_config)
+                               struct list_head *head_config, void *loc_)
 {
+       YYLTYPE *loc = loc_;
 #ifdef HAVE_LIBTRACEEVENT
        if (head_config) {
                struct perf_event_attr attr;
@@ -1454,17 +1463,17 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
 
        if (strpbrk(sys, "*?"))
                return add_tracepoint_multi_sys(list, idx, sys, event,
-                                               err, head_config);
+                                               err, head_config, loc);
        else
                return add_tracepoint_event(list, idx, sys, event,
-                                           err, head_config);
+                                           err, head_config, loc);
 #else
        (void)list;
        (void)idx;
        (void)sys;
        (void)event;
        (void)head_config;
-       parse_events_error__handle(err, 0, strdup("unsupported tracepoint"),
+       parse_events_error__handle(err, loc->first_column, strdup("unsupported tracepoint"),
                                strdup("libtraceevent is necessary for tracepoint support"));
        return -1;
 #endif
@@ -1559,13 +1568,14 @@ static bool config_term_percore(struct list_head *config_terms)
 int parse_events_add_pmu(struct parse_events_state *parse_state,
                         struct list_head *list, char *name,
                         struct list_head *head_config,
-                        bool auto_merge_stats)
+                        bool auto_merge_stats, void *loc_)
 {
        struct perf_event_attr attr;
        struct perf_pmu_info info;
        struct perf_pmu *pmu;
        struct evsel *evsel;
        struct parse_events_error *err = parse_state->error;
+       YYLTYPE *loc = loc_;
        LIST_HEAD(config_terms);
 
        pmu = parse_state->fake_pmu ?: perf_pmus__find(name);
@@ -1589,7 +1599,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
                if (asprintf(&err_str,
                                "Cannot find PMU `%s'. Missing kernel support?",
                                name) >= 0)
-                       parse_events_error__handle(err, 0, err_str, NULL);
+                       parse_events_error__handle(err, loc->first_column, err_str, NULL);
                return -EINVAL;
        }
        if (head_config)
@@ -1675,12 +1685,13 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
 
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
                               char *str, struct list_head *head,
-                              struct list_head **listp)
+                              struct list_head **listp, void *loc_)
 {
        struct parse_events_term *term;
        struct list_head *list = NULL;
        struct list_head *orig_head = NULL;
        struct perf_pmu *pmu = NULL;
+       YYLTYPE *loc = loc_;
        int ok = 0;
        char *config;
 
@@ -1727,9 +1738,10 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
                                parse_events_copy_term_list(head, &orig_head);
                                if (!parse_events_add_pmu(parse_state, list,
                                                          pmu->name, orig_head,
-                                                         auto_merge_stats)) {
+                                                         auto_merge_stats, loc)) {
                                        pr_debug("%s -> %s/%s/\n", str,
                                                 pmu->name, alias->str);
+                                       parse_state->wild_card_pmus = true;
                                        ok++;
                                }
                                parse_events_terms__delete(orig_head);
@@ -1739,7 +1751,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
 
        if (parse_state->fake_pmu) {
                if (!parse_events_add_pmu(parse_state, list, str, head,
-                                         /*auto_merge_stats=*/true)) {
+                                         /*auto_merge_stats=*/true, loc)) {
                        pr_debug("%s -> %s/%s/\n", str, "fake_pmu", str);
                        ok++;
                }
@@ -1972,8 +1984,11 @@ int parse_events_name(struct list_head *list, const char *name)
        struct evsel *evsel;
 
        __evlist__for_each_entry(list, evsel) {
-               if (!evsel->name)
+               if (!evsel->name) {
                        evsel->name = strdup(name);
+                       if (!evsel->name)
+                               return -ENOMEM;
+               }
        }
 
        return 0;
@@ -2715,9 +2730,6 @@ int parse_events_term__clone(struct parse_events_term **new,
 
 void parse_events_term__delete(struct parse_events_term *term)
 {
-       if (term->array.nr_ranges)
-               zfree(&term->array.ranges);
-
        if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM)
                zfree(&term->val.str);
 
@@ -2768,11 +2780,6 @@ void parse_events_terms__delete(struct list_head *terms)
        free(terms);
 }
 
-void parse_events__clear_array(struct parse_events_array *a)
-{
-       zfree(&a->ranges);
-}
-
 void parse_events_evlist_error(struct parse_events_state *parse_state,
                               int idx, const char *str)
 {
index b0eb95f..b77ff61 100644 (file)
@@ -81,17 +81,8 @@ enum {
        __PARSE_EVENTS__TERM_TYPE_NR,
 };
 
-struct parse_events_array {
-       size_t nr_ranges;
-       struct {
-               unsigned int start;
-               size_t length;
-       } *ranges;
-};
-
 struct parse_events_term {
        char *config;
-       struct parse_events_array array;
        union {
                char *str;
                u64  num;
@@ -121,17 +112,25 @@ struct parse_events_error {
 };
 
 struct parse_events_state {
+       /* The list parsed events are placed on. */
        struct list_head           list;
+       /* The updated index used by entries as they are added. */
        int                        idx;
+       /* Error information. */
        struct parse_events_error *error;
+       /* Used by BPF event creation. */
        struct evlist             *evlist;
+       /* Holds returned terms for term parsing. */
        struct list_head          *terms;
+       /* Start token. */
        int                        stoken;
+       /* Special fake PMU marker for testing. */
        struct perf_pmu           *fake_pmu;
        /* If non-null, when wildcard matching only match the given PMU. */
        const char                *pmu_filter;
        /* Should PE_LEGACY_NAME tokens be generated for config terms? */
        bool                       match_legacy_cache_terms;
+       /* Were multiple PMUs scanned to find events? */
        bool                       wild_card_pmus;
 };
 
@@ -154,25 +153,26 @@ int parse_events_term__clone(struct parse_events_term **new,
 void parse_events_term__delete(struct parse_events_term *term);
 void parse_events_terms__delete(struct list_head *terms);
 void parse_events_terms__purge(struct list_head *terms);
-void parse_events__clear_array(struct parse_events_array *a);
 int parse_events__modifier_event(struct list_head *list, char *str, bool add);
 int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, const char *name);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
                                const char *sys, const char *event,
                                struct parse_events_error *error,
-                               struct list_head *head_config);
+                               struct list_head *head_config, void *loc);
 int parse_events_load_bpf(struct parse_events_state *parse_state,
                          struct list_head *list,
                          char *bpf_file_name,
                          bool source,
-                         struct list_head *head_config);
+                         struct list_head *head_config,
+                         void *loc);
 /* Provide this function for perf test */
 struct bpf_object;
 int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
                              struct list_head *list,
                              struct bpf_object *obj,
-                             struct list_head *head_config);
+                             struct list_head *head_config,
+                             void *loc);
 int parse_events_add_numeric(struct parse_events_state *parse_state,
                             struct list_head *list,
                             u32 type, u64 config,
@@ -192,7 +192,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state,
 int parse_events_add_pmu(struct parse_events_state *parse_state,
                         struct list_head *list, char *name,
                         struct list_head *head_config,
-                        bool auto_merge_stats);
+                       bool auto_merge_stats, void *loc);
 
 struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
                                      const char *name, const char *metric_id,
@@ -201,7 +201,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
 int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
                               char *str,
                               struct list_head *head_config,
-                              struct list_head **listp);
+                              struct list_head **listp, void *loc);
 
 int parse_events_copy_term_list(struct list_head *old,
                                 struct list_head **new);
index 99335ec..d7d084c 100644 (file)
@@ -175,7 +175,6 @@ do {                                                        \
 %x mem
 %s config
 %x event
-%x array
 
 group          [^,{}/]*[{][^}]*[}][^,{}/]*
 event_pmu      [^,{}/]+[/][^/]*[/][^,{}/]*
@@ -251,14 +250,6 @@ non_digit  [^0-9]
                }
 }
 
-<array>{
-"]"                    { BEGIN(config); return ']'; }
-{num_dec}              { return value(yyscanner, 10); }
-{num_hex}              { return value(yyscanner, 16); }
-,                      { return ','; }
-"\.\.\."               { return PE_ARRAY_RANGE; }
-}
-
 <config>{
        /*
         * Please update config_term_names when new static term is added.
@@ -302,8 +293,6 @@ r0x{num_raw_hex}    { return str(yyscanner, PE_RAW); }
 {lc_type}-{lc_op_result}       { return lc_str(yyscanner, _parse_state); }
 {lc_type}-{lc_op_result}-{lc_op_result}        { return lc_str(yyscanner, _parse_state); }
 {name_minus}           { return str(yyscanner, PE_NAME); }
-\[all\]                        { return PE_ARRAY_ALL; }
-"["                    { BEGIN(array); return '['; }
 @{drv_cfg_term}                { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
 }
 
index 9f28d4b..c3517e3 100644 (file)
 #include "parse-events.h"
 #include "parse-events-bison.h"
 
+int parse_events_lex(YYSTYPE * yylval_param, YYLTYPE * yylloc_param , void *yyscanner);
 void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg);
 
-#define ABORT_ON(val) \
+#define PE_ABORT(val) \
 do { \
-       if (val) \
-               YYABORT; \
+       if (val == -ENOMEM) \
+               YYNOMEM; \
+       YYABORT; \
 } while (0)
 
 static struct list_head* alloc_list(void)
@@ -61,10 +63,8 @@ static void free_list_evsel(struct list_head* list_evsel)
 %token PE_BPF_OBJECT PE_BPF_SOURCE
 %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH
 %token PE_LEGACY_CACHE
-%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
+%token PE_PREFIX_MEM
 %token PE_ERROR
-%token PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
-%token PE_ARRAY_ALL PE_ARRAY_RANGE
 %token PE_DRV_CFG_TERM
 %token PE_TERM_HW
 %type <num> PE_VALUE
@@ -81,7 +81,6 @@ static void free_list_evsel(struct list_head* list_evsel)
 %type <str> PE_MODIFIER_EVENT
 %type <str> PE_MODIFIER_BP
 %type <str> PE_EVENT_NAME
-%type <str> PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
 %type <str> PE_DRV_CFG_TERM
 %type <str> name_or_raw name_or_legacy
 %destructor { free ($$); } <str>
@@ -109,11 +108,6 @@ static void free_list_evsel(struct list_head* list_evsel)
 %type <list_evsel> groups
 %destructor { free_list_evsel ($$); } <list_evsel>
 %type <tracepoint_name> tracepoint_name
-%destructor { free ($$.sys); free ($$.event); } <tracepoint_name>
-%type <array> array
-%type <array> array_term
-%type <array> array_terms
-%destructor { free ($$.ranges); } <array>
 %type <hardware_term> PE_TERM_HW
 %destructor { free ($$.str); } <hardware_term>
 
@@ -128,7 +122,6 @@ static void free_list_evsel(struct list_head* list_evsel)
                char *sys;
                char *event;
        } tracepoint_name;
-       struct parse_events_array array;
        struct hardware_term {
                char *str;
                u64 num;
@@ -265,7 +258,7 @@ PE_EVENT_NAME event_def
        free($1);
        if (err) {
                free_list_evsel($2);
-               YYABORT;
+               YYNOMEM;
        }
        $$ = $2;
 }
@@ -285,37 +278,38 @@ event_pmu:
 PE_NAME opt_pmu_config
 {
        struct parse_events_state *parse_state = _parse_state;
-       struct parse_events_error *error = parse_state->error;
        struct list_head *list = NULL, *orig_terms = NULL, *terms= NULL;
        char *pattern = NULL;
 
-#define CLEANUP_YYABORT                                        \
+#define CLEANUP                                                \
        do {                                            \
                parse_events_terms__delete($2);         \
                parse_events_terms__delete(orig_terms); \
                free(list);                             \
                free($1);                               \
                free(pattern);                          \
-               YYABORT;                                \
        } while(0)
 
-       if (parse_events_copy_term_list($2, &orig_terms))
-               CLEANUP_YYABORT;
-
-       if (error)
-               error->idx = @1.first_column;
+       if (parse_events_copy_term_list($2, &orig_terms)) {
+               CLEANUP;
+               YYNOMEM;
+       }
 
        list = alloc_list();
-       if (!list)
-               CLEANUP_YYABORT;
+       if (!list) {
+               CLEANUP;
+               YYNOMEM;
+       }
        /* Attempt to add to list assuming $1 is a PMU name. */
-       if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false)) {
+       if (parse_events_add_pmu(parse_state, list, $1, $2, /*auto_merge_stats=*/false, &@1)) {
                struct perf_pmu *pmu = NULL;
                int ok = 0;
 
                /* Failure to add, try wildcard expansion of $1 as a PMU name. */
-               if (asprintf(&pattern, "%s*", $1) < 0)
-                       CLEANUP_YYABORT;
+               if (asprintf(&pattern, "%s*", $1) < 0) {
+                       CLEANUP;
+                       YYNOMEM;
+               }
 
                while ((pmu = perf_pmus__scan(pmu)) != NULL) {
                        char *name = pmu->name;
@@ -330,10 +324,12 @@ PE_NAME opt_pmu_config
                            !perf_pmu__match(pattern, pmu->alias_name, $1)) {
                                bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu);
 
-                               if (parse_events_copy_term_list(orig_terms, &terms))
-                                       CLEANUP_YYABORT;
+                               if (parse_events_copy_term_list(orig_terms, &terms)) {
+                                       CLEANUP;
+                                       YYNOMEM;
+                               }
                                if (!parse_events_add_pmu(parse_state, list, pmu->name, terms,
-                                                         auto_merge_stats)) {
+                                                         auto_merge_stats, &@1)) {
                                        ok++;
                                        parse_state->wild_card_pmus = true;
                                }
@@ -344,30 +340,26 @@ PE_NAME opt_pmu_config
                if (!ok) {
                        /* Failure to add, assume $1 is an event name. */
                        zfree(&list);
-                       ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list);
+                       ok = !parse_events_multi_pmu_add(parse_state, $1, $2, &list, &@1);
                        $2 = NULL;
                }
-               if (!ok)
-                       CLEANUP_YYABORT;
+               if (!ok) {
+                       struct parse_events_error *error = parse_state->error;
+                       char *help;
+
+                       if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0)
+                               help = NULL;
+                       parse_events_error__handle(error, @1.first_column,
+                                                  strdup("Bad event or PMU"),
+                                                  help);
+                       CLEANUP;
+                       YYABORT;
+               }
        }
-       parse_events_terms__delete($2);
-       parse_events_terms__delete(orig_terms);
-       free(pattern);
-       free($1);
-       $$ = list;
-#undef CLEANUP_YYABORT
-}
-|
-PE_KERNEL_PMU_EVENT sep_dc
-{
-       struct list_head *list;
-       int err;
-
-       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
-       free($1);
-       if (err < 0)
-               YYABORT;
        $$ = list;
+       list = NULL;
+       CLEANUP;
+#undef CLEANUP
 }
 |
 PE_NAME sep_dc
@@ -375,61 +367,19 @@ PE_NAME sep_dc
        struct list_head *list;
        int err;
 
-       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list);
-       free($1);
-       if (err < 0)
-               YYABORT;
-       $$ = list;
-}
-|
-PE_KERNEL_PMU_EVENT opt_pmu_config
-{
-       struct list_head *list;
-       int err;
-
-       /* frees $2 */
-       err = parse_events_multi_pmu_add(_parse_state, $1, $2, &list);
-       free($1);
-       if (err < 0)
-               YYABORT;
-       $$ = list;
-}
-|
-PE_PMU_EVENT_FAKE sep_dc
-{
-       struct list_head *list;
-       int err;
-
-       list = alloc_list();
-       if (!list)
-               YYABORT;
-
-       err = parse_events_add_pmu(_parse_state, list, $1, /*head_config=*/NULL,
-                                  /*auto_merge_stats=*/false);
-       free($1);
+       err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1);
        if (err < 0) {
-               free(list);
-               YYABORT;
-       }
-       $$ = list;
-}
-|
-PE_PMU_EVENT_FAKE opt_pmu_config
-{
-       struct list_head *list;
-       int err;
-
-       list = alloc_list();
-       if (!list)
-               YYABORT;
+               struct parse_events_state *parse_state = _parse_state;
+               struct parse_events_error *error = parse_state->error;
+               char *help;
 
-       err = parse_events_add_pmu(_parse_state, list, $1, $2, /*auto_merge_stats=*/false);
-       free($1);
-       parse_events_terms__delete($2);
-       if (err < 0) {
-               free(list);
-               YYABORT;
+               if (asprintf(&help, "Unabled to find PMU or event on a PMU of '%s'", $1) < 0)
+                       help = NULL;
+               parse_events_error__handle(error, @1.first_column, strdup("Bad event name"), help);
+               free($1);
+               PE_ABORT(err);
        }
+       free($1);
        $$ = list;
 }
 
@@ -448,12 +398,13 @@ value_sym '/' event_config '/'
        bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        err = parse_events_add_numeric(_parse_state, list, type, config, $3, wildcard);
        parse_events_terms__delete($3);
        if (err) {
                free_list_evsel(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -464,21 +415,28 @@ value_sym sep_slash_slash_dc
        int type = $1 >> 16;
        int config = $1 & 255;
        bool wildcard = (type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE);
+       int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
-       ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config,
-                                         /*head_config=*/NULL, wildcard));
+       if (!list)
+               YYNOMEM;
+       err = parse_events_add_numeric(_parse_state, list, type, config, /*head_config=*/NULL, wildcard);
+       if (err)
+               PE_ABORT(err);
        $$ = list;
 }
 |
 PE_VALUE_SYM_TOOL sep_slash_slash_dc
 {
        struct list_head *list;
+       int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
-       ABORT_ON(parse_events_add_tool(_parse_state, list, $1));
+       if (!list)
+               YYNOMEM;
+       err = parse_events_add_tool(_parse_state, list, $1);
+       if (err)
+               YYNOMEM;
        $$ = list;
 }
 
@@ -490,14 +448,16 @@ PE_LEGACY_CACHE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_cache(list, &parse_state->idx, $1, parse_state, $2);
 
        parse_events_terms__delete($2);
        free($1);
        if (err) {
                free_list_evsel(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -509,14 +469,16 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, $6, $4, $7);
        parse_events_terms__delete($7);
        free($6);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -527,13 +489,15 @@ PE_PREFIX_MEM PE_VALUE PE_BP_SLASH PE_VALUE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, NULL, $4, $5);
        parse_events_terms__delete($5);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -544,14 +508,16 @@ PE_PREFIX_MEM PE_VALUE PE_BP_COLON PE_MODIFIER_BP opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
+
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, $4, 0, $5);
        parse_events_terms__delete($5);
        free($4);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -562,13 +528,14 @@ PE_PREFIX_MEM PE_VALUE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        err = parse_events_add_breakpoint(_parse_state, list,
                                          $2, NULL, 0, $3);
        parse_events_terms__delete($3);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -582,19 +549,20 @@ tracepoint_name opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        if (error)
                error->idx = @1.first_column;
 
        err = parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
-                                       error, $2);
+                                       error, $2, &@1);
 
        parse_events_terms__delete($2);
        free($1.sys);
        free($1.event);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -614,13 +582,14 @@ PE_VALUE ':' PE_VALUE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        err = parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4,
                                       /*wildcard=*/false);
        parse_events_terms__delete($4);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -633,17 +602,20 @@ PE_RAW opt_event_config
        u64 num;
 
        list = alloc_list();
-       ABORT_ON(!list);
+       if (!list)
+               YYNOMEM;
        errno = 0;
        num = strtoull($1 + 1, NULL, 16);
-       ABORT_ON(errno);
+       /* Given the lexer will only give [a-fA-F0-9]+ a failure here should be impossible. */
+       if (errno)
+               YYABORT;
        free($1);
        err = parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, num, $2,
                                       /*wildcard=*/false);
        parse_events_terms__delete($2);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -656,13 +628,14 @@ PE_BPF_OBJECT opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
-       err = parse_events_load_bpf(parse_state, list, $1, false, $2);
+       if (!list)
+               YYNOMEM;
+       err = parse_events_load_bpf(parse_state, list, $1, false, $2, &@1);
        parse_events_terms__delete($2);
        free($1);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -673,12 +646,13 @@ PE_BPF_SOURCE opt_event_config
        int err;
 
        list = alloc_list();
-       ABORT_ON(!list);
-       err = parse_events_load_bpf(_parse_state, list, $1, true, $2);
+       if (!list)
+               YYNOMEM;
+       err = parse_events_load_bpf(_parse_state, list, $1, true, $2, &@1);
        parse_events_terms__delete($2);
        if (err) {
                free(list);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = list;
 }
@@ -738,7 +712,8 @@ event_term
        struct list_head *head = malloc(sizeof(*head));
        struct parse_events_term *term = $1;
 
-       ABORT_ON(!head);
+       if (!head)
+               YYNOMEM;
        INIT_LIST_HEAD(head);
        list_add_tail(&term->list, head);
        $$ = head;
@@ -752,11 +727,12 @@ event_term:
 PE_RAW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
+                                        strdup("raw"), $1, &@1, &@1);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_RAW,
-                                       strdup("raw"), $1, &@1, &@1)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -764,12 +740,12 @@ PE_RAW
 name_or_raw '=' name_or_legacy
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, $1, $3, &@1, &@3);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3, &@1, &@3)) {
+       if (err) {
                free($1);
                free($3);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -777,11 +753,12 @@ name_or_raw '=' name_or_legacy
 name_or_raw '=' PE_VALUE
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                        $1, $3, false, &@1, &@3);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3, false, &@1, &@3)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -789,12 +766,13 @@ name_or_raw '=' PE_VALUE
 name_or_raw '=' PE_TERM_HW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                        $1, $3.str, &@1, &@3);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $3.str, &@1, &@3)) {
+       if (err) {
                free($1);
                free($3.str);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -802,11 +780,12 @@ name_or_raw '=' PE_TERM_HW
 PE_LEGACY_CACHE
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
+                                        $1, 1, true, &@1, NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE,
-                                       $1, 1, true, &@1, NULL)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -814,11 +793,12 @@ PE_LEGACY_CACHE
 PE_NAME
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+                                        $1, 1, true, &@1, NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, 1, true, &@1, NULL)) {
+       if (err) {
                free($1);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -826,11 +806,12 @@ PE_NAME
 PE_TERM_HW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
+                                        $1.str, $1.num & 255, false, &@1, NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_HARDWARE,
-                                  $1.str, $1.num & 255, false, &@1, NULL)) {
+       if (err) {
                free($1.str);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -838,10 +819,11 @@ PE_TERM_HW
 PE_TERM '=' name_or_legacy
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3);
 
-       if (parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)) {
+       if (err) {
                free($3);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -849,10 +831,11 @@ PE_TERM '=' name_or_legacy
 PE_TERM '=' PE_TERM_HW
 {
        struct parse_events_term *term;
+       int err = parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3);
 
-       if (parse_events_term__str(&term, (int)$1, NULL, $3.str, &@1, &@3)) {
+       if (err) {
                free($3.str);
-               YYABORT;
+               PE_ABORT(err);
        }
        $$ = term;
 }
@@ -860,127 +843,36 @@ PE_TERM '=' PE_TERM_HW
 PE_TERM '=' PE_TERM
 {
        struct parse_events_term *term;
+       int err = parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3);
 
-       ABORT_ON(parse_events_term__term(&term, (int)$1, (int)$3, &@1, &@3));
-       $$ = term;
-}
-|
-PE_TERM '=' PE_VALUE
-{
-       struct parse_events_term *term;
+       if (err)
+               PE_ABORT(err);
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
        $$ = term;
 }
 |
-PE_TERM
+PE_TERM '=' PE_VALUE
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3);
 
-       ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
-       $$ = term;
-}
-|
-name_or_raw array '=' name_or_legacy
-{
-       struct parse_events_term *term;
+       if (err)
+               PE_ABORT(err);
 
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $4, &@1, &@4)) {
-               free($1);
-               free($4);
-               free($2.ranges);
-               YYABORT;
-       }
-       term->array = $2;
        $$ = term;
 }
 |
-name_or_raw array '=' PE_VALUE
+PE_TERM
 {
        struct parse_events_term *term;
+       int err = parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL);
 
-       if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-                                       $1, $4, false, &@1, &@4)) {
-               free($1);
-               free($2.ranges);
-               YYABORT;
-       }
-       term->array = $2;
-       $$ = term;
-}
-|
-PE_DRV_CFG_TERM
-{
-       struct parse_events_term *term;
-       char *config = strdup($1);
+       if (err)
+               PE_ABORT(err);
 
-       ABORT_ON(!config);
-       if (parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
-                                       config, $1, &@1, NULL)) {
-               free($1);
-               free(config);
-               YYABORT;
-       }
        $$ = term;
 }
 
-array:
-'[' array_terms ']'
-{
-       $$ = $2;
-}
-|
-PE_ARRAY_ALL
-{
-       $$.nr_ranges = 0;
-       $$.ranges = NULL;
-}
-
-array_terms:
-array_terms ',' array_term
-{
-       struct parse_events_array new_array;
-
-       new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
-       new_array.ranges = realloc($1.ranges,
-                               sizeof(new_array.ranges[0]) *
-                               new_array.nr_ranges);
-       ABORT_ON(!new_array.ranges);
-       memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
-              $3.nr_ranges * sizeof(new_array.ranges[0]));
-       free($3.ranges);
-       $$ = new_array;
-}
-|
-array_term
-
-array_term:
-PE_VALUE
-{
-       struct parse_events_array array;
-
-       array.nr_ranges = 1;
-       array.ranges = malloc(sizeof(array.ranges[0]));
-       ABORT_ON(!array.ranges);
-       array.ranges[0].start = $1;
-       array.ranges[0].length = 1;
-       $$ = array;
-}
-|
-PE_VALUE PE_ARRAY_RANGE PE_VALUE
-{
-       struct parse_events_array array;
-
-       ABORT_ON($3 < $1);
-       array.nr_ranges = 1;
-       array.ranges = malloc(sizeof(array.ranges[0]));
-       ABORT_ON(!array.ranges);
-       array.ranges[0].start = $1;
-       array.ranges[0].length = $3 - $1 + 1;
-       $$ = array;
-}
-
 sep_dc: ':' |
 
 sep_slash_slash_dc: '/' '/' | ':' |
index 28380e7..d5406ef 100644 (file)
@@ -19,8 +19,8 @@
 #include "evsel.h"
 #include "pmu.h"
 #include "pmus.h"
-#include "pmu-bison.h"
-#include "pmu-flex.h"
+#include <util/pmu-bison.h>
+#include <util/pmu-flex.h>
 #include "parse-events.h"
 #include "print-events.h"
 #include "header.h"
index dff4e89..3d46cca 100644 (file)
@@ -11,6 +11,9 @@
 #include <linux/bitmap.h>
 #include <string.h>
 #include "pmu.h"
+#include "pmu-bison.h"
+
+int perf_pmu_lex(YYSTYPE * yylval_param , void *yyscanner);
 
 #define ABORT_ON(val) \
 do { \
index 16822a8..2d056f0 100644 (file)
@@ -2800,13 +2800,18 @@ static void warn_uprobe_event_compat(struct probe_trace_event *tev)
        if (!tev->uprobes || tev->nargs == 0 || !buf)
                goto out;
 
-       for (i = 0; i < tev->nargs; i++)
-               if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
-                       pr_warning("Please upgrade your kernel to at least "
-                                  "3.14 to have access to feature %s\n",
+       for (i = 0; i < tev->nargs; i++) {
+               if (strchr(tev->args[i].value, '@')) {
+                       pr_warning("%s accesses a variable by symbol name, but that is not supported for user application probe.\n",
+                                  tev->args[i].value);
+                       break;
+               }
+               if (strglobmatch(tev->args[i].value, "[$+-]*")) {
+                       pr_warning("Please upgrade your kernel to at least 3.14 to have access to feature %s\n",
                                   tev->args[i].value);
                        break;
                }
+       }
 out:
        free(buf);
 }
index c220fec..586b94e 100644 (file)
@@ -5,4 +5,5 @@ perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
 
 CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
 
-CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum
+# -Wno-declaration-after-statement: The python headers have mixed code with declarations (decls after asserts, for instance)
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-deprecated-declarations -Wno-switch-enum -Wno-declaration-after-statement
index 869738f..79d5e29 100644 (file)
@@ -66,6 +66,9 @@ if cc_is_clang:
 else:
     cflags += ['-Wno-cast-function-type' ]
 
+# The python headers have mixed code with declarations (decls after asserts, for instance)
+cflags += [ "-Wno-declaration-after-statement" ]
+
 src_perf  = getenv('srctree') + '/tools/perf'
 build_lib = getenv('PYTHON_EXTBUILD_LIB')
 build_tmp = getenv('PYTHON_EXTBUILD_TMP')
index 967e583..ec35060 100644 (file)
@@ -729,7 +729,7 @@ size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
 
 size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
 {
-       struct perf_stat_config sc;
+       struct perf_stat_config sc = {};
        size_t ret;
 
        perf_event__read_stat_config(&sc, &event->stat_config);
index 0b16640..fe5e699 100644 (file)
@@ -80,6 +80,15 @@ err_thread:
        return NULL;
 }
 
+static void (*thread__priv_destructor)(void *priv);
+
+void thread__set_priv_destructor(void (*destructor)(void *priv))
+{
+       assert(thread__priv_destructor == NULL);
+
+       thread__priv_destructor = destructor;
+}
+
 void thread__delete(struct thread *thread)
 {
        struct namespaces *namespaces, *tmp_namespaces;
@@ -112,6 +121,10 @@ void thread__delete(struct thread *thread)
        exit_rwsem(thread__namespaces_lock(thread));
        exit_rwsem(thread__comm_lock(thread));
        thread__free_stitch_list(thread);
+
+       if (thread__priv_destructor)
+               thread__priv_destructor(thread__priv(thread));
+
        RC_CHK_FREE(thread);
 }
 
index 9068a21..e79225a 100644 (file)
@@ -71,6 +71,8 @@ struct thread *thread__new(pid_t pid, pid_t tid);
 int thread__init_maps(struct thread *thread, struct machine *machine);
 void thread__delete(struct thread *thread);
 
+void thread__set_priv_destructor(void (*destructor)(void *priv));
+
 struct thread *thread__get(struct thread *thread);
 void thread__put(struct thread *thread);
 
index 172e472..d69d034 100644 (file)
@@ -177,3 +177,23 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
 _ge_attempt = $(or $(get-executable),$(call _gea_err,$(2)))
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
+
+# version-ge3
+#
+# Usage $(call version-ge3,2.6.4,$(FLEX_VERSION))
+#
+# To compare if a 3 component version is greater or equal to another, first use
+# was to check the flex version to see if we can use compiler warnings as
+# errors for one of the cases flex generates code C compilers complains about.
+
+version-ge3 = $(shell echo "$(1).$(2)" | awk -F'.' '{ printf("%d\n", (10000000 * $$1 + 10000 * $$2 + $$3) >= (10000000 * $$4 + 10000 * $$5 + $$6)) }')
+
+# version-lt3
+#
+# Usage $(call version-lt3,2.6.2,$(FLEX_VERSION))
+#
+# To compare if a 3 component version is less thjan another, first use was to
+# check the flex version to see if we can use compiler warnings as errors for
+# one of the cases flex generates code C compilers complains about.
+
+version-lt3 = $(shell echo "$(1).$(2)" | awk -F'.' '{ printf("%d\n", (10000000 * $$1 + 10000 * $$2 + $$3) < (10000000 * $$4 + 10000 * $$5 + $$6)) }')