Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf

author Jakub Kicinski <kuba@kernel.org>

Thu, 22 Oct 2020 16:51:41 +0000 (09:51 -0700)

committer Jakub Kicinski <kuba@kernel.org>

Thu, 22 Oct 2020 16:51:41 +0000 (09:51 -0700)
author Jakub Kicinski <kuba@kernel.org>
Thu, 22 Oct 2020 16:51:41 +0000 (09:51 -0700)
committer Jakub Kicinski <kuba@kernel.org>
Thu, 22 Oct 2020 16:51:41 +0000 (09:51 -0700)
diff --git a/MAINTAINERS b/MAINTAINERS

index 0f59b04..6d50cbf 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3244,7 +3244,8 @@ R:        KP Singh <kpsingh@chromium.org>
  L:     netdev@vger.kernel.org
  L:     bpf@vger.kernel.org
  S:     Supported
-Q:     https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
+W:     https://bpf.io/
+Q:     https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
  F:     Documentation/bpf/
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 20fc24c..72d62cb 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -607,12 +607,21 @@ struct bpf_skb_data_end {
         void *data_end;
  };
  
+struct bpf_nh_params {
+       u32 nh_family;
+       union {
+               u32 ipv4_nh;
+               struct in6_addr ipv6_nh;
+       };
+};
+
  struct bpf_redirect_info {
         u32 flags;
         u32 tgt_index;
         void *tgt_value;
         struct bpf_map *map;
         u32 kern_flags;
+       struct bpf_nh_params nh;
  };
  
  DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index bf5a99d..e6ceac3 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3677,15 +3677,19 @@ union bpf_attr {
   *     Return
   *             The id is returned or 0 in case the id could not be retrieved.
   *
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
   *     Description
   *             Redirect the packet to another net device of index *ifindex*
   *             and fill in L2 addresses from neighboring subsystem. This helper
   *             is somewhat similar to **bpf_redirect**\ (), except that it
   *             populates L2 addresses as well, meaning, internally, the helper
- *             performs a FIB lookup based on the skb's networking header to
- *             get the address of the next hop and then relies on the neighbor
- *             lookup for the L2 address of the nexthop.
+ *             relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ *             The helper will perform a FIB lookup based on the skb's
+ *             networking header to get the address of the next hop, unless
+ *             this is supplied by the caller in the *params* argument. The
+ *             *plen* argument indicates the len of *params* and should be set
+ *             to 0 if *params* is NULL.
   *
   *             The *flags* argument is reserved and must be 0. The helper is
   *             currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
         __u8    dmac[6];     /* ETH_ALEN */
  };
  
+struct bpf_redir_neigh {
+       /* network family for lookup (AF_INET, AF_INET6) */
+       __u32 nh_family;
+       /* network address of nexthop; skips fib lookup to find gateway */
+       union {
+               __be32          ipv4_nh;
+               __u32           ipv6_nh[4];  /* in6_addr; network order */
+       };
+};
+
  enum bpf_task_fd_type {
         BPF_FD_TYPE_RAW_TRACEPOINT,     /* tp name */
         BPF_FD_TYPE_TRACEPOINT,         /* tp name */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 1110ecd..8f50c9c 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2913,7 +2913,6 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
         case BPF_CGROUP_INET_INGRESS:
         case BPF_CGROUP_INET_EGRESS:
                 return BPF_PROG_TYPE_CGROUP_SKB;
-               break;
         case BPF_CGROUP_INET_SOCK_CREATE:
         case BPF_CGROUP_INET_SOCK_RELEASE:
         case BPF_CGROUP_INET4_POST_BIND:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 39d7f44..6200519 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5133,24 +5133,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                                 regs[BPF_REG_0].id = ++env->id_gen;
                 } else {
                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
-                       regs[BPF_REG_0].id = ++env->id_gen;
                 }
         } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
                 mark_reg_known_zero(env, regs, BPF_REG_0);
                 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
-               regs[BPF_REG_0].id = ++env->id_gen;
         } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
                 mark_reg_known_zero(env, regs, BPF_REG_0);
                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
-               regs[BPF_REG_0].id = ++env->id_gen;
         } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
                 mark_reg_known_zero(env, regs, BPF_REG_0);
                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
-               regs[BPF_REG_0].id = ++env->id_gen;
         } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
                 mark_reg_known_zero(env, regs, BPF_REG_0);
                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
-               regs[BPF_REG_0].id = ++env->id_gen;
                 regs[BPF_REG_0].mem_size = meta.mem_size;
         } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
                    fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
@@ -5199,6 +5194,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
                 return -EINVAL;
         }
  
+       if (reg_type_may_be_null(regs[BPF_REG_0].type))
+               regs[BPF_REG_0].id = ++env->id_gen;
+
         if (is_ptr_cast_function(func_id)) {
                 /* For release_reference() */
                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
@@ -7212,7 +7210,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
                                  struct bpf_reg_state *reg, u32 id,
                                  bool is_null)
  {
-       if (reg_type_may_be_null(reg->type) && reg->id == id) {
+       if (reg_type_may_be_null(reg->type) && reg->id == id &&
+           !WARN_ON_ONCE(!reg->id)) {
                 /* Old offset (both fixed and variable parts) should
                  * have been known-zero, because we don't allow pointer
                  * arithmetic on pointers that might be NULL.
diff --git a/net/core/filter.c b/net/core/filter.c

index c5e2a1c..6d0fa65 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
  }
  
  #if IS_ENABLED(CONFIG_IPV6)
-static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
+                           struct net_device *dev, struct bpf_nh_params *nh)
  {
-       struct dst_entry *dst = skb_dst(skb);
-       struct net_device *dev = dst->dev;
         u32 hh_len = LL_RESERVED_SPACE(dev);
         const struct in6_addr *nexthop;
+       struct dst_entry *dst = NULL;
         struct neighbour *neigh;
  
         if (dev_xmit_recursion()) {
@@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
         }
  
         rcu_read_lock_bh();
-       nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
-                             &ipv6_hdr(skb)->daddr);
+       if (!nh) {
+               dst = skb_dst(skb);
+               nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
+                                     &ipv6_hdr(skb)->daddr);
+       } else {
+               nexthop = &nh->ipv6_nh;
+       }
         neigh = ip_neigh_gw6(dev, nexthop);
         if (likely(!IS_ERR(neigh))) {
                 int ret;
@@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb)
                 return ret;
         }
         rcu_read_unlock_bh();
-       IP6_INC_STATS(dev_net(dst->dev),
-                     ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+       if (dst)
+               IP6_INC_STATS(dev_net(dst->dev),
+                             ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  out_drop:
         kfree_skb(skb);
         return -ENETDOWN;
  }
  
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+                                  struct bpf_nh_params *nh)
  {
         const struct ipv6hdr *ip6h = ipv6_hdr(skb);
         struct net *net = dev_net(dev);
         int err, ret = NET_XMIT_DROP;
-       struct dst_entry *dst;
-       struct flowi6 fl6 = {
-               .flowi6_flags   = FLOWI_FLAG_ANYSRC,
-               .flowi6_mark    = skb->mark,
-               .flowlabel      = ip6_flowinfo(ip6h),
-               .flowi6_oif     = dev->ifindex,
-               .flowi6_proto   = ip6h->nexthdr,
-               .daddr          = ip6h->daddr,
-               .saddr          = ip6h->saddr,
-       };
  
-       dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
-       if (IS_ERR(dst))
-               goto out_drop;
+       if (!nh) {
+               struct dst_entry *dst;
+               struct flowi6 fl6 = {
+                       .flowi6_flags = FLOWI_FLAG_ANYSRC,
+                       .flowi6_mark  = skb->mark,
+                       .flowlabel    = ip6_flowinfo(ip6h),
+                       .flowi6_oif   = dev->ifindex,
+                       .flowi6_proto = ip6h->nexthdr,
+                       .daddr        = ip6h->daddr,
+                       .saddr        = ip6h->saddr,
+               };
+
+               dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
+               if (IS_ERR(dst))
+                       goto out_drop;
  
-       skb_dst_set(skb, dst);
+               skb_dst_set(skb, dst);
+       } else if (nh->nh_family != AF_INET6) {
+               goto out_drop;
+       }
  
-       err = bpf_out_neigh_v6(net, skb);
+       err = bpf_out_neigh_v6(net, skb, dev, nh);
         if (unlikely(net_xmit_eval(err)))
                 dev->stats.tx_errors++;
         else
@@ -2252,7 +2264,8 @@ out_xmit:
         return ret;
  }
  #else
-static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
+                                  struct bpf_nh_params *nh)
  {
         kfree_skb(skb);
         return NET_XMIT_DROP;
@@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev)
  #endif /* CONFIG_IPV6 */
  
  #if IS_ENABLED(CONFIG_INET)
-static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
+static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
+                           struct net_device *dev, struct bpf_nh_params *nh)
  {
-       struct dst_entry *dst = skb_dst(skb);
-       struct rtable *rt = container_of(dst, struct rtable, dst);
-       struct net_device *dev = dst->dev;
         u32 hh_len = LL_RESERVED_SPACE(dev);
         struct neighbour *neigh;
         bool is_v6gw = false;
@@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb)
         }
  
         rcu_read_lock_bh();
-       neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+       if (!nh) {
+               struct dst_entry *dst = skb_dst(skb);
+               struct rtable *rt = container_of(dst, struct rtable, dst);
+
+               neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
+       } else if (nh->nh_family == AF_INET6) {
+               neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
+               is_v6gw = true;
+       } else if (nh->nh_family == AF_INET) {
+               neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
+       } else {
+               rcu_read_unlock_bh();
+               goto out_drop;
+       }
+
         if (likely(!IS_ERR(neigh))) {
                 int ret;
  
@@ -2309,33 +2334,37 @@ out_drop:
         return -ENETDOWN;
  }
  
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+                                  struct bpf_nh_params *nh)
  {
         const struct iphdr *ip4h = ip_hdr(skb);
         struct net *net = dev_net(dev);
         int err, ret = NET_XMIT_DROP;
-       struct rtable *rt;
-       struct flowi4 fl4 = {
-               .flowi4_flags   = FLOWI_FLAG_ANYSRC,
-               .flowi4_mark    = skb->mark,
-               .flowi4_tos     = RT_TOS(ip4h->tos),
-               .flowi4_oif     = dev->ifindex,
-               .flowi4_proto   = ip4h->protocol,
-               .daddr          = ip4h->daddr,
-               .saddr          = ip4h->saddr,
-       };
  
-       rt = ip_route_output_flow(net, &fl4, NULL);
-       if (IS_ERR(rt))
-               goto out_drop;
-       if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
-               ip_rt_put(rt);
-               goto out_drop;
-       }
+       if (!nh) {
+               struct flowi4 fl4 = {
+                       .flowi4_flags = FLOWI_FLAG_ANYSRC,
+                       .flowi4_mark  = skb->mark,
+                       .flowi4_tos   = RT_TOS(ip4h->tos),
+                       .flowi4_oif   = dev->ifindex,
+                       .flowi4_proto = ip4h->protocol,
+                       .daddr        = ip4h->daddr,
+                       .saddr        = ip4h->saddr,
+               };
+               struct rtable *rt;
+
+               rt = ip_route_output_flow(net, &fl4, NULL);
+               if (IS_ERR(rt))
+                       goto out_drop;
+               if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+                       ip_rt_put(rt);
+                       goto out_drop;
+               }
  
-       skb_dst_set(skb, &rt->dst);
+               skb_dst_set(skb, &rt->dst);
+       }
  
-       err = bpf_out_neigh_v4(net, skb);
+       err = bpf_out_neigh_v4(net, skb, dev, nh);
         if (unlikely(net_xmit_eval(err)))
                 dev->stats.tx_errors++;
         else
@@ -2348,14 +2377,16 @@ out_xmit:
         return ret;
  }
  #else
-static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
+                                  struct bpf_nh_params *nh)
  {
         kfree_skb(skb);
         return NET_XMIT_DROP;
  }
  #endif /* CONFIG_INET */
  
-static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
+static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
+                               struct bpf_nh_params *nh)
  {
         struct ethhdr *ethh = eth_hdr(skb);
  
@@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev)
         skb_reset_network_header(skb);
  
         if (skb->protocol == htons(ETH_P_IP))
-               return __bpf_redirect_neigh_v4(skb, dev);
+               return __bpf_redirect_neigh_v4(skb, dev, nh);
         else if (skb->protocol == htons(ETH_P_IPV6))
-               return __bpf_redirect_neigh_v6(skb, dev);
+               return __bpf_redirect_neigh_v6(skb, dev, nh);
  out:
         kfree_skb(skb);
         return -ENOTSUPP;
@@ -2382,7 +2413,8 @@ out:
  enum {
         BPF_F_NEIGH     = (1ULL << 1),
         BPF_F_PEER      = (1ULL << 2),
-#define BPF_F_REDIRECT_INTERNAL        (BPF_F_NEIGH | BPF_F_PEER)
+       BPF_F_NEXTHOP   = (1ULL << 3),
+#define BPF_F_REDIRECT_INTERNAL        (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
  };
  
  BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
@@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb)
                 return -EAGAIN;
         }
         return flags & BPF_F_NEIGH ?
-              __bpf_redirect_neigh(skb, dev) :
+              __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
+                                   &ri->nh : NULL) :
                __bpf_redirect(skb, dev, flags);
  out_drop:
         kfree_skb(skb);
@@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = {
         .arg2_type      = ARG_ANYTHING,
  };
  
-BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags)
+BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
+          int, plen, u64, flags)
  {
         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
  
-       if (unlikely(flags))
+       if (unlikely((plen && plen < sizeof(*params)) || flags))
                 return TC_ACT_SHOT;
  
-       ri->flags = BPF_F_NEIGH;
+       ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
         ri->tgt_index = ifindex;
  
+       BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
+       if (plen)
+               memcpy(&ri->nh, params, sizeof(ri->nh));
+
         return TC_ACT_REDIRECT;
  }
  
@@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = {
         .gpl_only       = false,
         .ret_type       = RET_INTEGER,
         .arg1_type      = ARG_ANYTHING,
-       .arg2_type      = ARG_ANYTHING,
+       .arg2_type      = ARG_PTR_TO_MEM_OR_NULL,
+       .arg3_type      = ARG_CONST_SIZE_OR_ZERO,
+       .arg4_type      = ARG_ANYTHING,
  };
  
  BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c

index 8142d02..b363503 100644 (file)
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -44,17 +44,17 @@ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
         switch (proto) {
         case ETH_P_8021Q:
         case ETH_P_8021AD:
-               bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN);
+               bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
                 break;
         case ETH_P_MPLS_UC:
         case ETH_P_MPLS_MC:
-               bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS);
+               bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
                 break;
         case ETH_P_IP:
-               bpf_tail_call_static(skb, &jmp_table, PARSE_IP);
+               bpf_tail_call(skb, &jmp_table, PARSE_IP);
                 break;
         case ETH_P_IPV6:
-               bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6);
+               bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
                 break;
         }
  }
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py

index 7d86fdd..6769caa 100755 (executable)
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -453,6 +453,7 @@ class PrinterHelpers(Printer):
              'struct bpf_perf_event_data',
              'struct bpf_perf_event_value',
              'struct bpf_pidns_info',
+            'struct bpf_redir_neigh',
              'struct bpf_sk_lookup',
              'struct bpf_sock',
              'struct bpf_sock_addr',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index bf5a99d..e6ceac3 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3677,15 +3677,19 @@ union bpf_attr {
   *     Return
   *             The id is returned or 0 in case the id could not be retrieved.
   *
- * long bpf_redirect_neigh(u32 ifindex, u64 flags)
+ * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags)
   *     Description
   *             Redirect the packet to another net device of index *ifindex*
   *             and fill in L2 addresses from neighboring subsystem. This helper
   *             is somewhat similar to **bpf_redirect**\ (), except that it
   *             populates L2 addresses as well, meaning, internally, the helper
- *             performs a FIB lookup based on the skb's networking header to
- *             get the address of the next hop and then relies on the neighbor
- *             lookup for the L2 address of the nexthop.
+ *             relies on the neighbor lookup for the L2 address of the nexthop.
+ *
+ *             The helper will perform a FIB lookup based on the skb's
+ *             networking header to get the address of the next hop, unless
+ *             this is supplied by the caller in the *params* argument. The
+ *             *plen* argument indicates the len of *params* and should be set
+ *             to 0 if *params* is NULL.
   *
   *             The *flags* argument is reserved and must be 0. The helper is
   *             currently only supported for tc BPF program types, and enabled
@@ -4906,6 +4910,16 @@ struct bpf_fib_lookup {
         __u8    dmac[6];     /* ETH_ALEN */
  };
  
+struct bpf_redir_neigh {
+       /* network family for lookup (AF_INET, AF_INET6) */
+       __u32 nh_family;
+       /* network address of nexthop; skips fib lookup to find gateway */
+       union {
+               __be32          ipv4_nh;
+               __u32           ipv6_nh[4];  /* in6_addr; network order */
+       };
+};
+
  enum bpf_task_fd_type {
         BPF_FD_TYPE_RAW_TRACEPOINT,     /* tp name */
         BPF_FD_TYPE_TRACEPOINT,         /* tp name */
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h

index 2bdb7d6..72b2511 100644 (file)
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -72,6 +72,7 @@
  /*
   * Helper function to perform a tail call with a constant/immediate map slot.
   */
+#if __clang_major__ >= 8 && defined(__bpf__)
  static __always_inline void
  bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
  {
@@ -98,6 +99,7 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
                      :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
                      : "r0", "r1", "r2", "r3", "r4", "r5");
  }
+#endif
  
  /*
   * Helper structure used by eBPF C program
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c

index 28e26bd..b58b775 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -5,18 +5,17 @@
  #include <bpf/libbpf.h>
  #include <bpf/btf.h>
  #include "test_ksyms_btf.skel.h"
+#include "test_ksyms_btf_null_check.skel.h"
  
  static int duration;
  
-void test_ksyms_btf(void)
+static void test_basic(void)
  {
         __u64 runqueues_addr, bpf_prog_active_addr;
         __u32 this_rq_cpu;
         int this_bpf_prog_active;
         struct test_ksyms_btf *skel = NULL;
         struct test_ksyms_btf__data *data;
-       struct btf *btf;
-       int percpu_datasec;
         int err;
  
         err = kallsyms_find("runqueues", &runqueues_addr);
@@ -31,20 +30,6 @@ void test_ksyms_btf(void)
         if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
                 return;
  
-       btf = libbpf_find_kernel_btf();
-       if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
-                 PTR_ERR(btf)))
-               return;
-
-       percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
-                                               BTF_KIND_DATASEC);
-       if (percpu_datasec < 0) {
-               printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
-                      __func__);
-               test__skip();
-               goto cleanup;
-       }
-
         skel = test_ksyms_btf__open_and_load();
         if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
                 goto cleanup;
@@ -83,6 +68,42 @@ void test_ksyms_btf(void)
               data->out__bpf_prog_active);
  
  cleanup:
-       btf__free(btf);
         test_ksyms_btf__destroy(skel);
  }
+
+static void test_null_check(void)
+{
+       struct test_ksyms_btf_null_check *skel;
+
+       skel = test_ksyms_btf_null_check__open_and_load();
+       CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n");
+
+       test_ksyms_btf_null_check__destroy(skel);
+}
+
+void test_ksyms_btf(void)
+{
+       int percpu_datasec;
+       struct btf *btf;
+
+       btf = libbpf_find_kernel_btf();
+       if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
+                 PTR_ERR(btf)))
+               return;
+
+       percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+                                               BTF_KIND_DATASEC);
+       btf__free(btf);
+       if (percpu_datasec < 0) {
+               printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+                      __func__);
+               test__skip();
+               return;
+       }
+
+       if (test__start_subtest("basic"))
+               test_basic();
+
+       if (test__start_subtest("null_check"))
+               test_null_check();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c

new file mode 100644 (file)

index 0000000..8bc8f7c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+       struct rq *rq;
+       int *active;
+       __u32 cpu;
+
+       cpu = bpf_get_smp_processor_id();
+       rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+       active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+       if (active) {
+               /* READ_ONCE */
+               *(volatile int *)active;
+               /* !rq has not been tested, so verifier should reject. */
+               *(volatile int *)(&rq->cpu);
+       }
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c

index fe18261..b985ac4 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -1,4 +1,5 @@
  // SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
  #include <stdint.h>
  #include <stdbool.h>
  
@@ -118,7 +119,7 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
         if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
                 return TC_ACT_SHOT;
  
-       return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
+       return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0);
  }
  
  SEC("src_ingress") int tc_src(struct __sk_buff *skb)
@@ -142,7 +143,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb)
         if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
                 return TC_ACT_SHOT;
  
-       return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
+       return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0);
  }
  
  char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c

new file mode 100644 (file)

index 0000000..d82ed34
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field)                (void *)(long)(field)
+#endif
+
+#define AF_INET 2
+#define AF_INET6 10
+
+static __always_inline int fill_fib_params_v4(struct __sk_buff *skb,
+                                             struct bpf_fib_lookup *fib_params)
+{
+       void *data_end = ctx_ptr(skb->data_end);
+       void *data = ctx_ptr(skb->data);
+       struct iphdr *ip4h;
+
+       if (data + sizeof(struct ethhdr) > data_end)
+               return -1;
+
+       ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+       if ((void *)(ip4h + 1) > data_end)
+               return -1;
+
+       fib_params->family = AF_INET;
+       fib_params->tos = ip4h->tos;
+       fib_params->l4_protocol = ip4h->protocol;
+       fib_params->sport = 0;
+       fib_params->dport = 0;
+       fib_params->tot_len = bpf_ntohs(ip4h->tot_len);
+       fib_params->ipv4_src = ip4h->saddr;
+       fib_params->ipv4_dst = ip4h->daddr;
+
+       return 0;
+}
+
+static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
+                                             struct bpf_fib_lookup *fib_params)
+{
+       struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src;
+       struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst;
+       void *data_end = ctx_ptr(skb->data_end);
+       void *data = ctx_ptr(skb->data);
+       struct ipv6hdr *ip6h;
+
+       if (data + sizeof(struct ethhdr) > data_end)
+               return -1;
+
+       ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+       if ((void *)(ip6h + 1) > data_end)
+               return -1;
+
+       fib_params->family = AF_INET6;
+       fib_params->flowinfo = 0;
+       fib_params->l4_protocol = ip6h->nexthdr;
+       fib_params->sport = 0;
+       fib_params->dport = 0;
+       fib_params->tot_len = bpf_ntohs(ip6h->payload_len);
+       *src = ip6h->saddr;
+       *dst = ip6h->daddr;
+
+       return 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+       void *data_end = ctx_ptr(skb->data_end);
+       void *data = ctx_ptr(skb->data);
+       __u32 *raw = data;
+
+       if (data + sizeof(struct ethhdr) > data_end)
+               return TC_ACT_SHOT;
+
+       return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static __always_inline int tc_redir(struct __sk_buff *skb)
+{
+       struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex };
+       __u8 zero[ETH_ALEN * 2];
+       int ret = -1;
+
+       switch (skb->protocol) {
+       case __bpf_constant_htons(ETH_P_IP):
+               ret = fill_fib_params_v4(skb, &fib_params);
+               break;
+       case __bpf_constant_htons(ETH_P_IPV6):
+               ret = fill_fib_params_v6(skb, &fib_params);
+               break;
+       }
+
+       if (ret)
+               return TC_ACT_OK;
+
+       ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0);
+       if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0)
+               return TC_ACT_OK;
+
+       __builtin_memset(&zero, 0, sizeof(zero));
+       if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+               return TC_ACT_SHOT;
+
+       if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) {
+               struct bpf_redir_neigh nh_params = {};
+
+               nh_params.nh_family = fib_params.family;
+               __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
+                                sizeof(nh_params.ipv6_nh));
+
+               return bpf_redirect_neigh(fib_params.ifindex, &nh_params,
+                                         sizeof(nh_params), 0);
+
+       } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) {
+               void *data_end = ctx_ptr(skb->data_end);
+               struct ethhdr *eth = ctx_ptr(skb->data);
+
+               if (eth + 1 > data_end)
+                       return TC_ACT_SHOT;
+
+               __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+               __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+
+               return bpf_redirect(fib_params.ifindex, 0);
+       }
+
+       return TC_ACT_SHOT;
+}
+
+/* these are identical, but keep them separate for compatibility with the
+ * section names expected by test_tc_redirect.sh
+ */
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+       return tc_redir(skb);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+       return tc_redir(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh

index 6d74825..8868aa1 100755 (executable)
--- a/tools/testing/selftests/bpf/test_tc_redirect.sh
+++ b/tools/testing/selftests/bpf/test_tc_redirect.sh
@@ -24,8 +24,7 @@ command -v timeout >/dev/null 2>&1 || \
         { echo >&2 "timeout is not available"; exit 1; }
  command -v ping >/dev/null 2>&1 || \
         { echo >&2 "ping is not available"; exit 1; }
-command -v ping6 >/dev/null 2>&1 || \
-       { echo >&2 "ping6 is not available"; exit 1; }
+if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi
  command -v perl >/dev/null 2>&1 || \
         { echo >&2 "perl is not available"; exit 1; }
  command -v jq >/dev/null 2>&1 || \
@@ -152,7 +151,7 @@ netns_test_connectivity()
         echo -e "${TEST}: ${GREEN}PASS${NC}"
  
         TEST="ICMPv6 connectivity test"
-       ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+       ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST}
         if [ $? -ne 0 ]; then
                 echo -e "${TEST}: ${RED}FAIL${NC}"
                 exit 1
@@ -170,6 +169,7 @@ hex_mem_str()
  netns_setup_bpf()
  {
         local obj=$1
+       local use_forwarding=${2:-0}
  
         ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
         ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
@@ -179,6 +179,14 @@ netns_setup_bpf()
         ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
         ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress  bpf da obj $obj sec chk_egress
  
+       if [ "$use_forwarding" -eq "1" ]; then
+               # bpf_fib_lookup() checks if forwarding is enabled
+               ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1
+               ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1
+               ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1
+               return 0
+       fi
+
         veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
         veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
  
@@ -200,5 +208,9 @@ netns_setup_bpf test_tc_neigh.o
  netns_test_connectivity
  netns_cleanup
  netns_setup
+netns_setup_bpf test_tc_neigh_fib.o 1
+netns_test_connectivity
+netns_cleanup
+netns_setup
  netns_setup_bpf test_tc_peer.o
  netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c

index b1aac26..ce13ece 100644 (file)
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -631,3 +631,28 @@
         .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
         .result = ACCEPT,
  },
+{
+       "mark null check on return value of bpf_skc_to helpers",
+       .insns = {
+       BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+       BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+       BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+       BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock),
+       BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+       BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2),
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+       BPF_EXIT_INSN(),
+       },
+       .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+       .result = REJECT,
+       .errstr = "invalid mem access",
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "unknown func",
+},
author	Jakub Kicinski <kuba@kernel.org>
	Thu, 22 Oct 2020 16:51:41 +0000 (09:51 -0700)
committer	Jakub Kicinski <kuba@kernel.org>
	Thu, 22 Oct 2020 16:51:41 +0000 (09:51 -0700)
MAINTAINERS		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
samples/bpf/sockex3_kern.c		patch \| blob \| history
scripts/bpf_helpers_doc.py		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/lib/bpf/bpf_helpers.h		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/ksyms_btf.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_tc_neigh.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_tc_redirect.sh		patch \| blob \| history
tools/testing/selftests/bpf/verifier/sock.c		patch \| blob \| history