bpf: Optimize program stats
authorAlexei Starovoitov <ast@kernel.org>
Wed, 10 Feb 2021 03:36:26 +0000 (19:36 -0800)
committerDaniel Borkmann <daniel@iogearbox.net>
Thu, 11 Feb 2021 15:17:50 +0000 (16:17 +0100)
Move bpf_prog_stats from prog->aux into prog to avoid one extra load
in critical path of program execution.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210210033634.62081-2-alexei.starovoitov@gmail.com
include/linux/bpf.h
include/linux/filter.h
kernel/bpf/core.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c

index 079162bbd387f44a8eebbdb97b6ae2df731955b3..5a388955e6b6872143e608248a9ac56160a2af55 100644 (file)
@@ -14,7 +14,6 @@
 #include <linux/numa.h>
 #include <linux/mm_types.h>
 #include <linux/wait.h>
-#include <linux/u64_stats_sync.h>
 #include <linux/refcount.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
@@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
  */
 #define MAX_BPF_FUNC_ARGS 12
 
-struct bpf_prog_stats {
-       u64 cnt;
-       u64 nsecs;
-       struct u64_stats_sync syncp;
-} __aligned(2 * sizeof(u64));
-
 struct btf_func_model {
        u8 ret_size;
        u8 nr_args;
@@ -845,7 +838,6 @@ struct bpf_prog_aux {
        u32 linfo_idx;
        u32 num_exentries;
        struct exception_table_entry *extable;
-       struct bpf_prog_stats __percpu *stats;
        union {
                struct work_struct work;
                struct rcu_head rcu;
index 5b3137d7b690e21e8bddfb336935339794769d70..cecb03c9d2518fe8cfd280eba90350eecad05a7e 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/vmalloc.h>
 #include <linux/sockptr.h>
 #include <crypto/sha1.h>
+#include <linux/u64_stats_sync.h>
 
 #include <net/sch_generic.h>
 
@@ -539,6 +540,12 @@ struct bpf_binary_header {
        u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
 };
 
+struct bpf_prog_stats {
+       u64 cnt;
+       u64 nsecs;
+       struct u64_stats_sync syncp;
+} __aligned(2 * sizeof(u64));
+
 struct bpf_prog {
        u16                     pages;          /* Number of allocated pages */
        u16                     jited:1,        /* Is our filter JIT'ed? */
@@ -557,10 +564,11 @@ struct bpf_prog {
        u32                     len;            /* Number of filter blocks */
        u32                     jited_len;      /* Size of jited insns in bytes */
        u8                      tag[BPF_TAG_SIZE];
-       struct bpf_prog_aux     *aux;           /* Auxiliary fields */
-       struct sock_fprog_kern  *orig_prog;     /* Original BPF program */
+       struct bpf_prog_stats __percpu *stats;
        unsigned int            (*bpf_func)(const void *ctx,
                                            const struct bpf_insn *insn);
+       struct bpf_prog_aux     *aux;           /* Auxiliary fields */
+       struct sock_fprog_kern  *orig_prog;     /* Original BPF program */
        /* Instructions for interpreter */
        struct sock_filter      insns[0];
        struct bpf_insn         insnsi[];
@@ -581,7 +589,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
                struct bpf_prog_stats *__stats;                         \
                u64 __start = sched_clock();                            \
                __ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func);   \
-               __stats = this_cpu_ptr(prog->aux->stats);               \
+               __stats = this_cpu_ptr(prog->stats);                    \
                u64_stats_update_begin(&__stats->syncp);                \
                __stats->cnt++;                                         \
                __stats->nsecs += sched_clock() - __start;              \
index 5bbd4884ff7ad97b426abbaa51f43977c90e7dd3..2cf71fd39c227a0e37b173991ad38db172505158 100644 (file)
@@ -114,8 +114,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
        if (!prog)
                return NULL;
 
-       prog->aux->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
-       if (!prog->aux->stats) {
+       prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
+       if (!prog->stats) {
                kfree(prog->aux);
                vfree(prog);
                return NULL;
@@ -124,7 +124,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
        for_each_possible_cpu(cpu) {
                struct bpf_prog_stats *pstats;
 
-               pstats = per_cpu_ptr(prog->aux->stats, cpu);
+               pstats = per_cpu_ptr(prog->stats, cpu);
                u64_stats_init(&pstats->syncp);
        }
        return prog;
@@ -249,10 +249,10 @@ void __bpf_prog_free(struct bpf_prog *fp)
        if (fp->aux) {
                mutex_destroy(&fp->aux->used_maps_mutex);
                mutex_destroy(&fp->aux->dst_mutex);
-               free_percpu(fp->aux->stats);
                kfree(fp->aux->poke_tab);
                kfree(fp->aux);
        }
+       free_percpu(fp->stats);
        vfree(fp);
 }
 
index e5999d86c76ea17ca49a4cf8851cf51cc20d9a3d..f7df56a704de8d259ee4a45e34c0d97789d9822a 100644 (file)
@@ -1739,7 +1739,7 @@ static void bpf_prog_get_stats(const struct bpf_prog *prog,
                unsigned int start;
                u64 tnsecs, tcnt;
 
-               st = per_cpu_ptr(prog->aux->stats, cpu);
+               st = per_cpu_ptr(prog->stats, cpu);
                do {
                        start = u64_stats_fetch_begin_irq(&st->syncp);
                        tnsecs = st->nsecs;
index 35c5887d82ffec4f723cadbcff1d23b413cba011..5be3beeedd74f36c6c897080e960e7ce4edab3cf 100644 (file)
@@ -412,7 +412,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
             * Hence check that 'start' is not zero.
             */
            start) {
-               stats = this_cpu_ptr(prog->aux->stats);
+               stats = this_cpu_ptr(prog->stats);
                u64_stats_update_begin(&stats->syncp);
                stats->cnt++;
                stats->nsecs += sched_clock() - start;
index 400d79e99fc81cc5224c1469d744e4bbefb8a5ad..424c1ba0f52fc7350f2a648635ab3a0904798bea 100644 (file)
@@ -11253,7 +11253,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
                /* BPF_PROG_RUN doesn't call subprogs directly,
                 * hence main prog stats include the runtime of subprogs.
                 * subprogs don't have IDs and not reachable via prog_get_next_id
-                * func[i]->aux->stats will never be accessed and stays NULL
+                * func[i]->stats will never be accessed and stays NULL
                 */
                func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
                if (!func[i])