perf trace: Allow specifying the bpf prog to augment specific syscalls
authorArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 16 Jul 2019 13:59:19 +0000 (10:59 -0300)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Mon, 29 Jul 2019 21:34:41 +0000 (18:34 -0300)
This is a step in the direction of being able to use a
BPF_MAP_TYPE_PROG_ARRAY to handle syscalls that need to copy pointer
payloads in addition to the raw tracepoint syscall args.

There is a first example in
tools/perf/examples/bpf/augmented_raw_syscalls.c for the 'open' syscall.

Next step is to introduce the prog array map and use this 'open'
augmenter, then use that augmenter in other syscalls that also only copy
the first arg as a string, and then show how to use with a syscall that
reads more than one filename, like 'rename', etc.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/n/tip-pys4v57x5qqrybb4cery2mc8@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-trace.c
tools/perf/examples/bpf/augmented_raw_syscalls.c

index 07df952..6cc696e 100644 (file)
@@ -690,6 +690,10 @@ struct syscall_arg_fmt {
 static struct syscall_fmt {
        const char *name;
        const char *alias;
+       struct {
+               const char *sys_enter,
+                          *sys_exit;
+       }          bpf_prog_name;
        struct syscall_arg_fmt arg[6];
        u8         nr_args;
        bool       errpid;
@@ -823,6 +827,7 @@ static struct syscall_fmt {
        { .name     = "newfstatat",
          .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
        { .name     = "open",
+         .bpf_prog_name = { .sys_enter = "!syscalls:sys_enter_open", },
          .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
        { .name     = "open_by_handle_at",
          .arg = { [0] = { .scnprintf = SCA_FDAT,       /* dfd */ },
@@ -967,6 +972,10 @@ struct syscall {
        struct tep_event    *tp_format;
        int                 nr_args;
        int                 args_size;
+       struct {
+               struct bpf_program *sys_enter,
+                                  *sys_exit;
+       }                   bpf_prog;
        bool                is_exit;
        bool                is_open;
        struct tep_format_field *args;
@@ -2742,6 +2751,39 @@ static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace,
        return bpf_object__find_program_by_title(trace->bpf_obj, name);
 }
 
+static struct bpf_program *trace__find_syscall_bpf_prog(struct trace *trace, struct syscall *sc,
+                                                       const char *prog_name, const char *type)
+{
+       struct bpf_program *prog;
+
+       if (prog_name == NULL)
+               goto out_unaugmented;
+
+       prog = trace__find_bpf_program_by_title(trace, prog_name);
+       if (prog != NULL)
+               return prog;
+
+       pr_debug("Couldn't find BPF prog \"%s\" to associate with syscalls:sys_%s_%s, not augmenting it\n",
+                prog_name, type, sc->name);
+out_unaugmented:
+       return trace->syscalls.unaugmented_prog;
+}
+
+static void trace__init_syscall_bpf_progs(struct trace *trace, int id)
+{
+       struct syscall *sc = trace__syscall_info(trace, NULL, id);
+
+       if (sc == NULL)
+               return;
+
+       if (sc->fmt != NULL) {
+               sc->bpf_prog.sys_enter = trace__find_syscall_bpf_prog(trace, sc, sc->fmt->bpf_prog_name.sys_enter, "enter");
+               sc->bpf_prog.sys_exit  = trace__find_syscall_bpf_prog(trace, sc, sc->fmt->bpf_prog_name.sys_exit,  "exit");
+       } else {
+               sc->bpf_prog.sys_enter = sc->bpf_prog.sys_exit = trace->syscalls.unaugmented_prog;
+       }
+}
+
 static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
 {
        struct syscall *sc = trace__syscall_info(trace, NULL, id);
@@ -2773,8 +2815,10 @@ static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
        for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
                int key = trace->ev_qualifier_ids.entries[i];
 
-               if (value.enabled)
+               if (value.enabled) {
                        trace__init_bpf_map_syscall_args(trace, key, &value);
+                       trace__init_syscall_bpf_progs(trace, key);
+               }
 
                err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
                if (err)
@@ -2793,8 +2837,10 @@ static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
        int err = 0, key;
 
        for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
-               if (enabled)
+               if (enabled) {
                        trace__init_bpf_map_syscall_args(trace, key, &value);
+                       trace__init_syscall_bpf_progs(trace, key);
+               }
 
                err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
                if (err)
index 48a536b..66b33b2 100644 (file)
@@ -94,6 +94,29 @@ int syscall_unaugmented(struct syscall_enter_args *args)
        return 1;
 }
 
+/*
+ * This will be tail_called from SEC("raw_syscalls:sys_enter"), so will find in
+ * augmented_filename_map what was read by that raw_syscalls:sys_enter and go
+ * on from there, reading the first syscall arg as a string, i.e. open's
+ * filename.
+ */
+SEC("!syscalls:sys_enter_open")
+int sys_enter_open(struct syscall_enter_args *args)
+{
+       int key = 0;
+       struct augmented_args_filename *augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
+       const void *filename_arg = (const void *)args->args[0];
+       unsigned int len = sizeof(augmented_args->args);
+
+        if (augmented_args == NULL)
+                return 1; /* Failure: don't filter */
+
+       len += augmented_filename__read(&augmented_args->filename, filename_arg, sizeof(augmented_args->filename.value));
+
+       /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
+       return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
+}
+
 SEC("raw_syscalls:sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {