bpf: Move rcu lock management out of BPF_PROG_RUN routines

author Stanislav Fomichev <sdf@google.com>

Thu, 14 Apr 2022 16:12:33 +0000 (09:12 -0700)

committer Alexei Starovoitov <ast@kernel.org>

Tue, 19 Apr 2022 16:45:47 +0000 (09:45 -0700)
author Stanislav Fomichev <sdf@google.com>
Thu, 14 Apr 2022 16:12:33 +0000 (09:12 -0700)
committer Alexei Starovoitov <ast@kernel.org>
Tue, 19 Apr 2022 16:45:47 +0000 (09:45 -0700)
diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c

index 3eff08d..fe17c7f 100644 (file)
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -216,8 +216,12 @@ void lirc_bpf_run(struct rc_dev *rcdev, u32 sample)
  
         raw->bpf_sample = sample;
  
-       if (raw->progs)
-               BPF_PROG_RUN_ARRAY(raw->progs, &raw->bpf_sample, bpf_prog_run);
+       if (raw->progs) {
+               rcu_read_lock();
+               bpf_prog_run_array(rcu_dereference(raw->progs),
+                                  &raw->bpf_sample, bpf_prog_run);
+               rcu_read_unlock();
+       }
  }
  
  /*
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index bdb5298..7bf4415 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1221,7 +1221,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
  /* an array of programs to be executed under rcu_lock.
   *
   * Typical usage:
- * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, bpf_prog_run);
+ * ret = bpf_prog_run_array(rcu_dereference(&bpf_prog_array), ctx, bpf_prog_run);
   *
   * the structure returned by bpf_prog_array_alloc() should be populated
   * with program pointers and the last pointer must be NULL.
@@ -1315,83 +1315,22 @@ static inline void bpf_reset_run_ctx(struct bpf_run_ctx *old_ctx)
  
  typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
  
-static __always_inline int
-BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
-                           const void *ctx, bpf_prog_run_fn run_prog,
-                           int retval, u32 *ret_flags)
-{
-       const struct bpf_prog_array_item *item;
-       const struct bpf_prog *prog;
-       const struct bpf_prog_array *array;
-       struct bpf_run_ctx *old_run_ctx;
-       struct bpf_cg_run_ctx run_ctx;
-       u32 func_ret;
-
-       run_ctx.retval = retval;
-       migrate_disable();
-       rcu_read_lock();
-       array = rcu_dereference(array_rcu);
-       item = &array->items[0];
-       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
-       while ((prog = READ_ONCE(item->prog))) {
-               run_ctx.prog_item = item;
-               func_ret = run_prog(prog, ctx);
-               if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
-                       run_ctx.retval = -EPERM;
-               *(ret_flags) |= (func_ret >> 1);
-               item++;
-       }
-       bpf_reset_run_ctx(old_run_ctx);
-       rcu_read_unlock();
-       migrate_enable();
-       return run_ctx.retval;
-}
-
-static __always_inline int
-BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
-                     const void *ctx, bpf_prog_run_fn run_prog,
-                     int retval)
-{
-       const struct bpf_prog_array_item *item;
-       const struct bpf_prog *prog;
-       const struct bpf_prog_array *array;
-       struct bpf_run_ctx *old_run_ctx;
-       struct bpf_cg_run_ctx run_ctx;
-
-       run_ctx.retval = retval;
-       migrate_disable();
-       rcu_read_lock();
-       array = rcu_dereference(array_rcu);
-       item = &array->items[0];
-       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
-       while ((prog = READ_ONCE(item->prog))) {
-               run_ctx.prog_item = item;
-               if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
-                       run_ctx.retval = -EPERM;
-               item++;
-       }
-       bpf_reset_run_ctx(old_run_ctx);
-       rcu_read_unlock();
-       migrate_enable();
-       return run_ctx.retval;
-}
-
  static __always_inline u32
-BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
+bpf_prog_run_array(const struct bpf_prog_array *array,
                    const void *ctx, bpf_prog_run_fn run_prog)
  {
         const struct bpf_prog_array_item *item;
         const struct bpf_prog *prog;
-       const struct bpf_prog_array *array;
         struct bpf_run_ctx *old_run_ctx;
         struct bpf_trace_run_ctx run_ctx;
         u32 ret = 1;
  
-       migrate_disable();
-       rcu_read_lock();
-       array = rcu_dereference(array_rcu);
+       RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "no rcu lock held");
+
         if (unlikely(!array))
-               goto out;
+               return ret;
+
+       migrate_disable();
         old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
         item = &array->items[0];
         while ((prog = READ_ONCE(item->prog))) {
@@ -1400,50 +1339,10 @@ BPF_PROG_RUN_ARRAY(const struct bpf_prog_array __rcu *array_rcu,
                 item++;
         }
         bpf_reset_run_ctx(old_run_ctx);
-out:
-       rcu_read_unlock();
         migrate_enable();
         return ret;
  }
  
-/* To be used by __cgroup_bpf_run_filter_skb for EGRESS BPF progs
- * so BPF programs can request cwr for TCP packets.
- *
- * Current cgroup skb programs can only return 0 or 1 (0 to drop the
- * packet. This macro changes the behavior so the low order bit
- * indicates whether the packet should be dropped (0) or not (1)
- * and the next bit is a congestion notification bit. This could be
- * used by TCP to call tcp_enter_cwr()
- *
- * Hence, new allowed return values of CGROUP EGRESS BPF programs are:
- *   0: drop packet
- *   1: keep packet
- *   2: drop packet and cn
- *   3: keep packet and cn
- *
- * This macro then converts it to one of the NET_XMIT or an error
- * code that is then interpreted as drop packet (and no cn):
- *   0: NET_XMIT_SUCCESS  skb should be transmitted
- *   1: NET_XMIT_DROP     skb should be dropped and cn
- *   2: NET_XMIT_CN       skb should be transmitted and cn
- *   3: -err              skb should be dropped
- */
-#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func)                \
-       ({                                              \
-               u32 _flags = 0;                         \
-               bool _cn;                               \
-               u32 _ret;                               \
-               _ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
-               _cn = _flags & BPF_RET_SET_CN;          \
-               if (_ret && !IS_ERR_VALUE((long)_ret))  \
-                       _ret = -EFAULT;                 \
-               if (!_ret)                              \
-                       _ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);  \
-               else                                    \
-                       _ret = (_cn ? NET_XMIT_DROP : _ret);            \
-               _ret;                                   \
-       })
-
  #ifdef CONFIG_BPF_SYSCALL
  DECLARE_PER_CPU(int, bpf_prog_active);
  extern struct mutex bpf_stats_enabled_mutex;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c

index 128028e..0cb6211 100644 (file)
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -22,6 +22,72 @@
  DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
  EXPORT_SYMBOL(cgroup_bpf_enabled_key);
  
+/* __always_inline is necessary to prevent indirect call through run_prog
+ * function pointer.
+ */
+static __always_inline int
+bpf_prog_run_array_cg_flags(const struct cgroup_bpf *cgrp,
+                           enum cgroup_bpf_attach_type atype,
+                           const void *ctx, bpf_prog_run_fn run_prog,
+                           int retval, u32 *ret_flags)
+{
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+       const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_cg_run_ctx run_ctx;
+       u32 func_ret;
+
+       run_ctx.retval = retval;
+       migrate_disable();
+       rcu_read_lock();
+       array = rcu_dereference(cgrp->effective[atype]);
+       item = &array->items[0];
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       while ((prog = READ_ONCE(item->prog))) {
+               run_ctx.prog_item = item;
+               func_ret = run_prog(prog, ctx);
+               if (!(func_ret & 1) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
+               *(ret_flags) |= (func_ret >> 1);
+               item++;
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+       rcu_read_unlock();
+       migrate_enable();
+       return run_ctx.retval;
+}
+
+static __always_inline int
+bpf_prog_run_array_cg(const struct cgroup_bpf *cgrp,
+                     enum cgroup_bpf_attach_type atype,
+                     const void *ctx, bpf_prog_run_fn run_prog,
+                     int retval)
+{
+       const struct bpf_prog_array_item *item;
+       const struct bpf_prog *prog;
+       const struct bpf_prog_array *array;
+       struct bpf_run_ctx *old_run_ctx;
+       struct bpf_cg_run_ctx run_ctx;
+
+       run_ctx.retval = retval;
+       migrate_disable();
+       rcu_read_lock();
+       array = rcu_dereference(cgrp->effective[atype]);
+       item = &array->items[0];
+       old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
+       while ((prog = READ_ONCE(item->prog))) {
+               run_ctx.prog_item = item;
+               if (!run_prog(prog, ctx) && !IS_ERR_VALUE((long)run_ctx.retval))
+                       run_ctx.retval = -EPERM;
+               item++;
+       }
+       bpf_reset_run_ctx(old_run_ctx);
+       rcu_read_unlock();
+       migrate_enable();
+       return run_ctx.retval;
+}
+
  void cgroup_bpf_offline(struct cgroup *cgrp)
  {
         cgroup_get(cgrp);
@@ -1075,11 +1141,38 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
         bpf_compute_and_save_data_end(skb, &saved_data_end);
  
         if (atype == CGROUP_INET_EGRESS) {
-               ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
-                       cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
+               u32 flags = 0;
+               bool cn;
+
+               ret = bpf_prog_run_array_cg_flags(
+                       &cgrp->bpf, atype,
+                       skb, __bpf_prog_run_save_cb, 0, &flags);
+
+               /* Return values of CGROUP EGRESS BPF programs are:
+                *   0: drop packet
+                *   1: keep packet
+                *   2: drop packet and cn
+                *   3: keep packet and cn
+                *
+                * The returned value is then converted to one of the NET_XMIT
+                * or an error code that is then interpreted as drop packet
+                * (and no cn):
+                *   0: NET_XMIT_SUCCESS  skb should be transmitted
+                *   1: NET_XMIT_DROP     skb should be dropped and cn
+                *   2: NET_XMIT_CN       skb should be transmitted and cn
+                *   3: -err              skb should be dropped
+                */
+
+               cn = flags & BPF_RET_SET_CN;
+               if (ret && !IS_ERR_VALUE((long)ret))
+                       ret = -EFAULT;
+               if (!ret)
+                       ret = (cn ? NET_XMIT_CN : NET_XMIT_SUCCESS);
+               else
+                       ret = (cn ? NET_XMIT_DROP : ret);
         } else {
-               ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
-                                           __bpf_prog_run_save_cb, 0);
+               ret = bpf_prog_run_array_cg(&cgrp->bpf, atype,
+                                           skb, __bpf_prog_run_save_cb, 0);
                 if (ret && !IS_ERR_VALUE((long)ret))
                         ret = -EFAULT;
         }
@@ -1109,8 +1202,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
  {
         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
  
-       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
-                                    bpf_prog_run, 0);
+       return bpf_prog_run_array_cg(&cgrp->bpf, atype, sk, bpf_prog_run, 0);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
  
@@ -1155,8 +1247,8 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
         }
  
         cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-       return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
-                                          bpf_prog_run, 0, flags);
+       return bpf_prog_run_array_cg_flags(&cgrp->bpf, atype,
+                                          &ctx, bpf_prog_run, 0, flags);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
  
@@ -1182,8 +1274,8 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
  {
         struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
  
-       return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
-                                    bpf_prog_run, 0);
+       return bpf_prog_run_array_cg(&cgrp->bpf, atype, sock_ops, bpf_prog_run,
+                                    0);
  }
  EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
  
@@ -1200,8 +1292,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
  
         rcu_read_lock();
         cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                   bpf_prog_run, 0);
+       ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
         rcu_read_unlock();
  
         return ret;
@@ -1366,8 +1457,7 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
  
         rcu_read_lock();
         cgrp = task_dfl_cgroup(current);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
-                                   bpf_prog_run, 0);
+       ret = bpf_prog_run_array_cg(&cgrp->bpf, atype, &ctx, bpf_prog_run, 0);
         rcu_read_unlock();
  
         kfree(ctx.cur_val);
@@ -1459,7 +1549,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
         }
  
         lock_sock(sk);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
+       ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_SETSOCKOPT,
                                     &ctx, bpf_prog_run, 0);
         release_sock(sk);
  
@@ -1559,7 +1649,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
         }
  
         lock_sock(sk);
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+       ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
                                     &ctx, bpf_prog_run, retval);
         release_sock(sk);
  
@@ -1608,7 +1698,7 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
          * be called if that data shouldn't be "exported".
          */
  
-       ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
+       ret = bpf_prog_run_array_cg(&cgrp->bpf, CGROUP_GETSOCKOPT,
                                     &ctx, bpf_prog_run, retval);
         if (ret < 0)
                 return ret;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c

index b26f3da..f15b826 100644 (file)
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -129,7 +129,10 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
          * out of events when it was updated in between this and the
          * rcu_dereference() which is accepted risk.
          */
-       ret = BPF_PROG_RUN_ARRAY(call->prog_array, ctx, bpf_prog_run);
+       rcu_read_lock();
+       ret = bpf_prog_run_array(rcu_dereference(call->prog_array),
+                                ctx, bpf_prog_run);
+       rcu_read_unlock();
  
   out:
         __this_cpu_dec(bpf_prog_active);
author	Stanislav Fomichev <sdf@google.com>
	Thu, 14 Apr 2022 16:12:33 +0000 (09:12 -0700)
committer	Alexei Starovoitov <ast@kernel.org>
	Tue, 19 Apr 2022 16:45:47 +0000 (09:45 -0700)
drivers/media/rc/bpf-lirc.c		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
kernel/bpf/cgroup.c		patch \| blob \| history
kernel/trace/bpf_trace.c		patch \| blob \| history