Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

author David S. Miller <davem@davemloft.net>

Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)

committer David S. Miller <davem@davemloft.net>

Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
author David S. Miller <davem@davemloft.net>
Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
committer David S. Miller <davem@davemloft.net>
Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst

index a702f67..93e8cf1 100644 (file)
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -84,6 +84,7 @@ Other
     :maxdepth: 1
  
     ringbuf
+   llvm_reloc
  
  .. Links:
  .. _networking-filter: ../networking/filter.rst
diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst

new file mode 100644 (file)

index 0000000..ca8957d
--- /dev/null
+++ b/Documentation/bpf/llvm_reloc.rst
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+====================
+BPF LLVM Relocations
+====================
+
+This document describes LLVM BPF backend relocation types.
+
+Relocation Record
+=================
+
+LLVM BPF backend records each relocation with the following 16-byte
+ELF structure::
+
+  typedef struct
+  {
+    Elf64_Addr    r_offset;  // Offset from the beginning of section.
+    Elf64_Xword   r_info;    // Relocation type and symbol index.
+  } Elf64_Rel;
+
+For example, for the following code::
+
+  int g1 __attribute__((section("sec")));
+  int g2 __attribute__((section("sec")));
+  static volatile int l1 __attribute__((section("sec")));
+  static volatile int l2 __attribute__((section("sec")));
+  int test() {
+    return g1 + g2 + l1 + l2;
+  }
+
+Compiled with ``clang -target bpf -O2 -c test.c``, the following is
+the code with ``llvm-objdump -dr test.o``::
+
+       0:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+                0000000000000000:  R_BPF_64_64  g1
+       2:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+       3:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
+                0000000000000018:  R_BPF_64_64  g2
+       5:       61 20 00 00 00 00 00 00 r0 = *(u32 *)(r2 + 0)
+       6:       0f 10 00 00 00 00 00 00 r0 += r1
+       7:       18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll
+                0000000000000038:  R_BPF_64_64  sec
+       9:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+      10:       0f 10 00 00 00 00 00 00 r0 += r1
+      11:       18 01 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 r1 = 12 ll
+                0000000000000058:  R_BPF_64_64  sec
+      13:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+      14:       0f 10 00 00 00 00 00 00 r0 += r1
+      15:       95 00 00 00 00 00 00 00 exit
+
+There are four relations in the above for four ``LD_imm64`` instructions.
+The following ``llvm-readelf -r test.o`` shows the binary values of the four
+relocations::
+
+  Relocation section '.rel.text' at offset 0x190 contains 4 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000000  0000000600000001 R_BPF_64_64            0000000000000000 g1
+  0000000000000018  0000000700000001 R_BPF_64_64            0000000000000004 g2
+  0000000000000038  0000000400000001 R_BPF_64_64            0000000000000000 sec
+  0000000000000058  0000000400000001 R_BPF_64_64            0000000000000000 sec
+
+Each relocation is represented by ``Offset`` (8 bytes) and ``Info`` (8 bytes).
+For example, the first relocation corresponds to the first instruction
+(Offset 0x0) and the corresponding ``Info`` indicates the relocation type
+of ``R_BPF_64_64`` (type 1) and the entry in the symbol table (entry 6).
+The following is the symbol table with ``llvm-readelf -s test.o``::
+
+  Symbol table '.symtab' contains 8 entries:
+     Num:    Value          Size Type    Bind   Vis       Ndx Name
+       0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT   UND
+       1: 0000000000000000     0 FILE    LOCAL  DEFAULT   ABS test.c
+       2: 0000000000000008     4 OBJECT  LOCAL  DEFAULT     4 l1
+       3: 000000000000000c     4 OBJECT  LOCAL  DEFAULT     4 l2
+       4: 0000000000000000     0 SECTION LOCAL  DEFAULT     4 sec
+       5: 0000000000000000   128 FUNC    GLOBAL DEFAULT     2 test
+       6: 0000000000000000     4 OBJECT  GLOBAL DEFAULT     4 g1
+       7: 0000000000000004     4 OBJECT  GLOBAL DEFAULT     4 g2
+
+The 6th entry is global variable ``g1`` with value 0.
+
+Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
+for global variable ``g2`` which has a symbol value 4, the offset
+from the start of ``.data`` section.
+
+The third and fourth relocations refers to static variables ``l1``
+and ``l2``. From ``.rel.text`` section above, it is not clear
+which symbols they really refers to as they both refers to
+symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
+and represents a section. So for static variable or function,
+the section offset is written to the original insn
+buffer, which is called ``A`` (addend). Looking at
+above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
+From symbol table, we can find that they correspond to entries ``2``
+and ``3`` for ``l1`` and ``l2``.
+
+In general, the ``A`` is 0 for global variables and functions,
+and is the section offset or some computation result based on
+section offset for static variables/functions. The non-section-offset
+case refers to function calls. See below for more details.
+
+Different Relocation Types
+==========================
+
+Six relocation types are supported. The following is an overview and
+``S`` represents the value of the symbol in the symbol table::
+
+  Enum  ELF Reloc Type     Description      BitSize  Offset        Calculation
+  0     R_BPF_NONE         None
+  1     R_BPF_64_64        ld_imm64 insn    32       r_offset + 4  S + A
+  2     R_BPF_64_ABS64     normal data      64       r_offset      S + A
+  3     R_BPF_64_ABS32     normal data      32       r_offset      S + A
+  4     R_BPF_64_NODYLD32  .BTF[.ext] data  32       r_offset      S + A
+  10    R_BPF_64_32        call insn        32       r_offset + 4  (S + A) / 8 - 1
+
+For example, ``R_BPF_64_64`` relocation type is used for ``ld_imm64`` instruction.
+The actual to-be-relocated data (0 or section offset)
+is stored at ``r_offset + 4`` and the read/write
+data bitsize is 32 (4 bytes). The relocation can be resolved with
+the symbol value plus implicit addend. Note that the ``BitSize`` is 32 which
+means the section offset must be less than or equal to ``UINT32_MAX`` and this
+is enforced by LLVM BPF backend.
+
+In another case, ``R_BPF_64_ABS64`` relocation type is used for normal 64-bit data.
+The actual to-be-relocated data is stored at ``r_offset`` and the read/write data
+bitsize is 64 (8 bytes). The relocation can be resolved with
+the symbol value plus implicit addend.
+
+Both ``R_BPF_64_ABS32`` and ``R_BPF_64_NODYLD32`` types are for 32-bit data.
+But ``R_BPF_64_NODYLD32`` specifically refers to relocations in ``.BTF`` and
+``.BTF.ext`` sections. For cases like bcc where llvm ``ExecutionEngine RuntimeDyld``
+is involved, ``R_BPF_64_NODYLD32`` types of relocations should not be resolved
+to actual function/variable address. Otherwise, ``.BTF`` and ``.BTF.ext``
+become unusable by bcc and kernel.
+
+Type ``R_BPF_64_32`` is used for call instruction. The call target section
+offset is stored at ``r_offset + 4`` (32bit) and calculated as
+``(S + A) / 8 - 1``.
+
+Examples
+========
+
+Types ``R_BPF_64_64`` and ``R_BPF_64_32`` are used to resolve ``ld_imm64``
+and ``call`` instructions. For example::
+
+  __attribute__((noinline)) __attribute__((section("sec1")))
+  int gfunc(int a, int b) {
+    return a * b;
+  }
+  static __attribute__((noinline)) __attribute__((section("sec1")))
+  int lfunc(int a, int b) {
+    return a + b;
+  }
+  int global __attribute__((section("sec2")));
+  int test(int a, int b) {
+    return gfunc(a, b) +  lfunc(a, b) + global;
+  }
+
+Compiled with ``clang -target bpf -O2 -c test.c``, we will have
+following code with `llvm-objdump -dr test.o``::
+
+  Disassembly of section .text:
+
+  0000000000000000 <test>:
+         0:       bf 26 00 00 00 00 00 00 r6 = r2
+         1:       bf 17 00 00 00 00 00 00 r7 = r1
+         2:       85 10 00 00 ff ff ff ff call -1
+                  0000000000000010:  R_BPF_64_32  gfunc
+         3:       bf 08 00 00 00 00 00 00 r8 = r0
+         4:       bf 71 00 00 00 00 00 00 r1 = r7
+         5:       bf 62 00 00 00 00 00 00 r2 = r6
+         6:       85 10 00 00 02 00 00 00 call 2
+                  0000000000000030:  R_BPF_64_32  sec1
+         7:       0f 80 00 00 00 00 00 00 r0 += r8
+         8:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+                  0000000000000040:  R_BPF_64_64  global
+        10:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+        11:       0f 10 00 00 00 00 00 00 r0 += r1
+        12:       95 00 00 00 00 00 00 00 exit
+
+  Disassembly of section sec1:
+
+  0000000000000000 <gfunc>:
+         0:       bf 20 00 00 00 00 00 00 r0 = r2
+         1:       2f 10 00 00 00 00 00 00 r0 *= r1
+         2:       95 00 00 00 00 00 00 00 exit
+
+  0000000000000018 <lfunc>:
+         3:       bf 20 00 00 00 00 00 00 r0 = r2
+         4:       0f 10 00 00 00 00 00 00 r0 += r1
+         5:       95 00 00 00 00 00 00 00 exit
+
+The first relocation corresponds to ``gfunc(a, b)`` where ``gfunc`` has a value of 0,
+so the ``call`` instruction offset is ``(0 + 0)/8 - 1 = -1``.
+The second relocation corresponds to ``lfunc(a, b)`` where ``lfunc`` has a section
+offset ``0x18``, so the ``call`` instruction offset is ``(0 + 0x18)/8 - 1 = 2``.
+The third relocation corresponds to ld_imm64 of ``global``, which has a section
+offset ``0``.
+
+The following is an example to show how R_BPF_64_ABS64 could be generated::
+
+  int global() { return 0; }
+  struct t { void *g; } gbl = { global };
+
+Compiled with ``clang -target bpf -O2 -g -c test.c``, we will see a
+relocation below in ``.data`` section with command
+``llvm-readelf -r test.o``::
+
+  Relocation section '.rel.data' at offset 0x458 contains 1 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000000  0000000700000002 R_BPF_64_ABS64         0000000000000000 global
+
+The relocation says the first 8-byte of ``.data`` section should be
+filled with address of ``global`` variable.
+
+With ``llvm-readelf`` output, we can see that dwarf sections have a bunch of
+``R_BPF_64_ABS32`` and ``R_BPF_64_ABS64`` relocations::
+
+  Relocation section '.rel.debug_info' at offset 0x468 contains 13 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000006  0000000300000003 R_BPF_64_ABS32         0000000000000000 .debug_abbrev
+  000000000000000c  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000012  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000016  0000000600000003 R_BPF_64_ABS32         0000000000000000 .debug_line
+  000000000000001a  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  000000000000001e  0000000200000002 R_BPF_64_ABS64         0000000000000000 .text
+  000000000000002b  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000037  0000000800000002 R_BPF_64_ABS64         0000000000000000 gbl
+  0000000000000040  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  ......
+
+The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
+
+  Relocation section '.rel.BTF' at offset 0x538 contains 1 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000084  0000000800000004 R_BPF_64_NODYLD32      0000000000000000 gbl
+
+  Relocation section '.rel.BTF.ext' at offset 0x548 contains 2 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  000000000000002c  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
+  0000000000000040  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst

index a5c2500..b0436d3 100644 (file)
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -761,6 +761,31 @@ tcp_syncookies - INTEGER
         network connections you can set this knob to 2 to enable
         unconditionally generation of syncookies.
  
+tcp_migrate_req - BOOLEAN
+       The incoming connection is tied to a specific listening socket when
+       the initial SYN packet is received during the three-way handshake.
+       When a listener is closed, in-flight request sockets during the
+       handshake and established sockets in the accept queue are aborted.
+
+       If the listener has SO_REUSEPORT enabled, other listeners on the
+       same port should have been able to accept such connections. This
+       option makes it possible to migrate such child sockets to another
+       listener after close() or shutdown().
+
+       The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
+       usually be used to define the policy to pick an alive listener.
+       Otherwise, the kernel will randomly pick an alive listener only if
+       this option is enabled.
+
+       Note that migration between listeners with different settings may
+       crash applications. Let's say migration happens from listener A to
+       B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
+       the requests migrated from A. To avoid such a situation, cancel
+       migration by returning SK_DROP in the type of eBPF program, or
+       disable this option.
+
+       Default: 0
+
  tcp_fastopen - INTEGER
         Enable TCP Fast Open (RFC7413) to send and accept data in the opening
         SYN packet.
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

index 9dc44ba..f309fc1 100644 (file)
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -70,6 +70,8 @@ struct bpf_map_ops {
         void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
         int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
                                 union bpf_attr __user *uattr);
+       int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
+                                         void *value, u64 flags);
         int (*map_lookup_and_delete_batch)(struct bpf_map *map,
                                            const union bpf_attr *attr,
                                            union bpf_attr __user *uattr);
@@ -1499,8 +1501,13 @@ int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
                     struct net_device *dev_rx);
  int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
                     struct net_device *dev_rx);
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+                         struct bpf_map *map, bool exclude_ingress);
  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
                              struct bpf_prog *xdp_prog);
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+                          struct bpf_prog *xdp_prog, struct bpf_map *map,
+                          bool exclude_ingress);
  bool dev_map_can_have_prog(struct bpf_map *map);
  
  void __cpu_map_flush(void);
@@ -1668,6 +1675,13 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
         return 0;
  }
  
+static inline
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+                         struct bpf_map *map, bool exclude_ingress)
+{
+       return 0;
+}
+
  struct sk_buff;
  
  static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
@@ -1677,6 +1691,14 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
         return 0;
  }
  
+static inline
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+                          struct bpf_prog *xdp_prog, struct bpf_map *map,
+                          bool exclude_ingress)
+{
+       return 0;
+}
+
  static inline void __cpu_map_flush(void)
  {
  }
@@ -2026,6 +2048,7 @@ struct sk_reuseport_kern {
         struct sk_buff *skb;
         struct sock *sk;
         struct sock *selected_sk;
+       struct sock *migrating_sk;
         void *data_end;
         u32 hash;
         u32 reuseport_id;
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h

index b902c58..24496bc 100644 (file)
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -58,7 +58,7 @@ struct bpf_local_storage_data {
          * from the object's bpf_local_storage.
          *
          * Put it in the same cacheline as the data to minimize
-        * the number of cachelines access during the cache hit case.
+        * the number of cachelines accessed during the cache hit case.
          */
         struct bpf_local_storage_map __rcu *smap;
         u8 data[] __aligned(8);
@@ -71,7 +71,7 @@ struct bpf_local_storage_elem {
         struct bpf_local_storage __rcu *local_storage;
         struct rcu_head rcu;
         /* 8 bytes hole */
-       /* The data is stored in aother cacheline to minimize
+       /* The data is stored in another cacheline to minimize
          * the number of cachelines access during a cache hit.
          */
         struct bpf_local_storage_data sdata ____cacheline_aligned;
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 9a09547..688856e 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -646,6 +646,7 @@ struct bpf_redirect_info {
         u32 flags;
         u32 tgt_index;
         void *tgt_value;
+       struct bpf_map *map;
         u32 map_id;
         enum bpf_map_type map_type;
         u32 kern_flags;
@@ -995,11 +996,13 @@ void bpf_warn_invalid_xdp_action(u32 act);
  #ifdef CONFIG_INET
  struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                                   struct bpf_prog *prog, struct sk_buff *skb,
+                                 struct sock *migrating_sk,
                                   u32 hash);
  #else
  static inline struct sock *
  bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                      struct bpf_prog *prog, struct sk_buff *skb,
+                    struct sock *migrating_sk,
                      u32 hash)
  {
         return NULL;
@@ -1464,17 +1467,19 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
  }
  #endif /* IS_ENABLED(CONFIG_IPV6) */
  
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+                                                 u64 flags, const u64 flag_mask,
                                                   void *lookup_elem(struct bpf_map *map, u32 key))
  {
         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
  
         /* Lower bits of the flags are used as return code on lookup failure */
-       if (unlikely(flags > XDP_TX))
+       if (unlikely(flags & ~(action_mask | flag_mask)))
                 return XDP_ABORTED;
  
         ri->tgt_value = lookup_elem(map, ifindex);
-       if (unlikely(!ri->tgt_value)) {
+       if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
                 /* If the lookup fails we want to clear out the state in the
                  * redirect_info struct completely, so that if an eBPF program
                  * performs multiple lookups, the last one always takes
@@ -1482,13 +1487,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
                  */
                 ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
                 ri->map_type = BPF_MAP_TYPE_UNSPEC;
-               return flags;
+               return flags & action_mask;
         }
  
         ri->tgt_index = ifindex;
         ri->map_id = map->id;
         ri->map_type = map->map_type;
  
+       if (flags & BPF_F_BROADCAST) {
+               WRITE_ONCE(ri->map, map);
+               ri->flags = flags;
+       } else {
+               WRITE_ONCE(ri->map, NULL);
+               ri->flags = 0;
+       }
+
         return XDP_REDIRECT;
  }
  
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h

index 746c80c..b862051 100644 (file)
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -126,6 +126,7 @@ struct netns_ipv4 {
         u8 sysctl_tcp_syn_retries;
         u8 sysctl_tcp_synack_retries;
         u8 sysctl_tcp_syncookies;
+       u8 sysctl_tcp_migrate_req;
         int sysctl_tcp_reordering;
         u8 sysctl_tcp_retries1;
         u8 sysctl_tcp_retries2;
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h

index 505f1e1..473b0b0 100644 (file)
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock;
  struct sock_reuseport {
         struct rcu_head         rcu;
  
-       u16                     max_socks;      /* length of socks */
-       u16                     num_socks;      /* elements in socks */
+       u16                     max_socks;              /* length of socks */
+       u16                     num_socks;              /* elements in socks */
+       u16                     num_closed_socks;       /* closed elements in socks */
         /* The last synq overflow event timestamp of this
          * reuse->socks[] group.
          */
@@ -31,10 +32,14 @@ extern int reuseport_alloc(struct sock *sk, bool bind_inany);
  extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
                               bool bind_inany);
  extern void reuseport_detach_sock(struct sock *sk);
+void reuseport_stop_listen_sock(struct sock *sk);
  extern struct sock *reuseport_select_sock(struct sock *sk,
                                           u32 hash,
                                           struct sk_buff *skb,
                                           int hdr_len);
+struct sock *reuseport_migrate_sock(struct sock *sk,
+                                   struct sock *migrating_sk,
+                                   struct sk_buff *skb);
  extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
  extern int reuseport_detach_prog(struct sock *sk);
  
diff --git a/include/net/xdp.h b/include/net/xdp.h

index a5bc214..5533f0a 100644 (file)
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -170,6 +170,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
  struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                          struct net_device *dev);
  int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
  
  static inline
  void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h

index fcad364..c40fc97 100644 (file)
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -110,7 +110,11 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
                 u32 ifindex = 0, map_index = index;
  
                 if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
-                       ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+                       /* Just leave to_ifindex to 0 if do broadcast redirect,
+                        * as tgt will be NULL.
+                        */
+                       if (tgt)
+                               ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
                 } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
                         ifindex = index;
                         map_index = 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h

index 418b9b8..bf9252c 100644 (file)
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
   *             Look up an element with the given *key* in the map referred to
   *             by the file descriptor *fd*, and if found, delete the element.
   *
+ *             For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *             types, the *flags* argument needs to be set to 0, but for other
+ *             map types, it may be specified as:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up and delete the value of a spin-locked map
+ *                     without returning the lock. This must be specified if
+ *                     the elements contain a spinlock.
+ *
   *             The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
   *             implement this command as a "pop" operation, deleting the top
   *             element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
   *             This command is only valid for the following map types:
   *             * **BPF_MAP_TYPE_QUEUE**
   *             * **BPF_MAP_TYPE_STACK**
+ *             * **BPF_MAP_TYPE_HASH**
+ *             * **BPF_MAP_TYPE_PERCPU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
   *
   *     Return
   *             Returns zero on success. On error, -1 is returned and *errno*
@@ -981,6 +994,8 @@ enum bpf_attach_type {
         BPF_SK_LOOKUP,
         BPF_XDP,
         BPF_SK_SKB_VERDICT,
+       BPF_SK_REUSEPORT_SELECT,
+       BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -2542,8 +2557,12 @@ union bpf_attr {
   *             The lower two bits of *flags* are used as the return code if
   *             the map lookup fails. This is so that the return value can be
   *             one of the XDP program return codes up to **XDP_TX**, as chosen
- *             by the caller. Any higher bits in the *flags* argument must be
- *             unset.
+ *             by the caller. The higher bits of *flags* can be set to
+ *             BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ *             With BPF_F_BROADCAST the packet will be broadcasted to all the
+ *             interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ *             interface will be excluded when do broadcasting.
   *
   *             See also **bpf_redirect**\ (), which only supports redirecting
   *             to an ifindex, but doesn't require a map to do so.
@@ -5109,6 +5128,12 @@ enum {
         BPF_F_BPRM_SECUREEXEC   = (1ULL << 0),
  };
  
+/* Flags for bpf_redirect_map helper */
+enum {
+       BPF_F_BROADCAST         = (1ULL << 3),
+       BPF_F_EXCLUDE_INGRESS   = (1ULL << 4),
+};
+
  #define __bpf_md_ptr(type, name)       \
  union {                                        \
         type name;                      \
@@ -5393,6 +5418,20 @@ struct sk_reuseport_md {
         __u32 ip_protocol;      /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
         __u32 bind_inany;       /* Is sock bound to an INANY address? */
         __u32 hash;             /* A hash of the packet 4 tuples */
+       /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+        * new incoming connection request (e.g. selecting a listen sk for
+        * the received SYN in the TCP case).  reuse->sk is one of the sk
+        * in the reuseport group. The bpf prog can use reuse->sk to learn
+        * the local listening ip/port without looking into the skb.
+        *
+        * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+        * reuse->migrating_sk is the socket that needs to be migrated
+        * to another listening socket.  migrating_sk could be a fullsock
+        * sk that is fully established or a reqsk that is in-the-middle
+        * of 3-way handshake.
+        */
+       __bpf_md_ptr(struct bpf_sock *, sk);
+       __bpf_md_ptr(struct bpf_sock *, migrating_sk);
  };
  
  #define BPF_TAG_SIZE   8
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c

index 2921ca3..96ceed0 100644 (file)
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -72,7 +72,7 @@ void bpf_inode_storage_free(struct inode *inode)
                 return;
         }
  
-       /* Netiher the bpf_prog nor the bpf-map's syscall
+       /* Neither the bpf_prog nor the bpf-map's syscall
          * could be modifying the local_storage->list now.
          * Thus, no elem can be added-to or deleted-from the
          * local_storage->list by the bpf_prog or by the bpf-map's syscall.
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c

index da471bf..0606237 100644 (file)
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -127,7 +127,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  }
  
  /* The set of hooks which are called without pagefaults disabled and are allowed
- * to "sleep" and thus can be used for sleeable BPF programs.
+ * to "sleep" and thus can be used for sleepable BPF programs.
   */
  BTF_SET_START(sleepable_lsm_hooks)
  BTF_ID(func, bpf_lsm_bpf)
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c

index 3925592..cb4b729 100644 (file)
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -51,7 +51,7 @@
   * The BTF type section contains a list of 'struct btf_type' objects.
   * Each one describes a C type.  Recall from the above section
   * that a 'struct btf_type' object could be immediately followed by extra
- * data in order to desribe some particular C types.
+ * data in order to describe some particular C types.
   *
   * type_id:
   * ~~~~~~~
@@ -1143,7 +1143,7 @@ static void *btf_show_obj_safe(struct btf_show *show,
  
         /*
          * We need a new copy to our safe object, either because we haven't
-        * yet copied and are intializing safe data, or because the data
+        * yet copied and are initializing safe data, or because the data
          * we want falls outside the boundaries of the safe object.
          */
         if (!safe) {
@@ -3417,7 +3417,7 @@ static struct btf_kind_operations func_proto_ops = {
          * BTF_KIND_FUNC_PROTO cannot be directly referred by
          * a struct's member.
          *
-        * It should be a funciton pointer instead.
+        * It should be a function pointer instead.
          * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
          *
          * Hence, there is no btf_func_check_member().
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c

index 5e31ee9..034ad93 100644 (file)
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1392,29 +1392,54 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
  select_insn:
         goto *jumptable[insn->code];
  
-       /* ALU */
-#define ALU(OPCODE, OP)                        \
-       ALU64_##OPCODE##_X:             \
-               DST = DST OP SRC;       \
-               CONT;                   \
-       ALU_##OPCODE##_X:               \
-               DST = (u32) DST OP (u32) SRC;   \
-               CONT;                   \
-       ALU64_##OPCODE##_K:             \
-               DST = DST OP IMM;               \
-               CONT;                   \
-       ALU_##OPCODE##_K:               \
-               DST = (u32) DST OP (u32) IMM;   \
+       /* Explicitly mask the register-based shift amounts with 63 or 31
+        * to avoid undefined behavior. Normally this won't affect the
+        * generated code, for example, in case of native 64 bit archs such
+        * as x86-64 or arm64, the compiler is optimizing the AND away for
+        * the interpreter. In case of JITs, each of the JIT backends compiles
+        * the BPF shift operations to machine instructions which produce
+        * implementation-defined results in such a case; the resulting
+        * contents of the register may be arbitrary, but program behaviour
+        * as a whole remains defined. In other words, in case of JIT backends,
+        * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
+        */
+       /* ALU (shifts) */
+#define SHT(OPCODE, OP)                                        \
+       ALU64_##OPCODE##_X:                             \
+               DST = DST OP (SRC & 63);                \
+               CONT;                                   \
+       ALU_##OPCODE##_X:                               \
+               DST = (u32) DST OP ((u32) SRC & 31);    \
+               CONT;                                   \
+       ALU64_##OPCODE##_K:                             \
+               DST = DST OP IMM;                       \
+               CONT;                                   \
+       ALU_##OPCODE##_K:                               \
+               DST = (u32) DST OP (u32) IMM;           \
+               CONT;
+       /* ALU (rest) */
+#define ALU(OPCODE, OP)                                        \
+       ALU64_##OPCODE##_X:                             \
+               DST = DST OP SRC;                       \
+               CONT;                                   \
+       ALU_##OPCODE##_X:                               \
+               DST = (u32) DST OP (u32) SRC;           \
+               CONT;                                   \
+       ALU64_##OPCODE##_K:                             \
+               DST = DST OP IMM;                       \
+               CONT;                                   \
+       ALU_##OPCODE##_K:                               \
+               DST = (u32) DST OP (u32) IMM;           \
                 CONT;
-
         ALU(ADD,  +)
         ALU(SUB,  -)
         ALU(AND,  &)
         ALU(OR,   |)
-       ALU(LSH, <<)
-       ALU(RSH, >>)
         ALU(XOR,  ^)
         ALU(MUL,  *)
+       SHT(LSH, <<)
+       SHT(RSH, >>)
+#undef SHT
  #undef ALU
         ALU_NEG:
                 DST = (u32) -DST;
@@ -1439,13 +1464,13 @@ select_insn:
                 insn++;
                 CONT;
         ALU_ARSH_X:
-               DST = (u64) (u32) (((s32) DST) >> SRC);
+               DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
                 CONT;
         ALU_ARSH_K:
                 DST = (u64) (u32) (((s32) DST) >> IMM);
                 CONT;
         ALU64_ARSH_X:
-               (*(s64 *) &DST) >>= SRC;
+               (*(s64 *) &DST) >>= (SRC & 63);
                 CONT;
         ALU64_ARSH_K:
                 (*(s64 *) &DST) >>= IMM;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c

index 5dd3e86..a1a0c4e 100644 (file)
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -601,7 +601,8 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
  
  static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
  {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+                                     __cpu_map_lookup_elem);
  }
  
  static int cpu_map_btf_id;
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c

index aa51647..2a75e6c 100644 (file)
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue {
         struct list_head flush_node;
         struct net_device *dev;
         struct net_device *dev_rx;
+       struct bpf_prog *xdp_prog;
         unsigned int count;
  };
  
@@ -197,6 +198,7 @@ static void dev_map_free(struct bpf_map *map)
         list_del_rcu(&dtab->list);
         spin_unlock(&dev_map_lock);
  
+       bpf_clear_redirect_map(map);
         synchronize_rcu();
  
         /* Make sure prior __dev_map_entry_free() have completed. */
@@ -326,22 +328,69 @@ bool dev_map_can_have_prog(struct bpf_map *map)
         return false;
  }
  
+static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
+                               struct xdp_frame **frames, int n,
+                               struct net_device *dev)
+{
+       struct xdp_txq_info txq = { .dev = dev };
+       struct xdp_buff xdp;
+       int i, nframes = 0;
+
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+               u32 act;
+               int err;
+
+               xdp_convert_frame_to_buff(xdpf, &xdp);
+               xdp.txq = &txq;
+
+               act = bpf_prog_run_xdp(xdp_prog, &xdp);
+               switch (act) {
+               case XDP_PASS:
+                       err = xdp_update_frame_from_buff(&xdp, xdpf);
+                       if (unlikely(err < 0))
+                               xdp_return_frame_rx_napi(xdpf);
+                       else
+                               frames[nframes++] = xdpf;
+                       break;
+               default:
+                       bpf_warn_invalid_xdp_action(act);
+                       fallthrough;
+               case XDP_ABORTED:
+                       trace_xdp_exception(dev, xdp_prog, act);
+                       fallthrough;
+               case XDP_DROP:
+                       xdp_return_frame_rx_napi(xdpf);
+                       break;
+               }
+       }
+       return nframes; /* sent frames count */
+}
+
  static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
  {
         struct net_device *dev = bq->dev;
+       unsigned int cnt = bq->count;
         int sent = 0, err = 0;
+       int to_send = cnt;
         int i;
  
-       if (unlikely(!bq->count))
+       if (unlikely(!cnt))
                 return;
  
-       for (i = 0; i < bq->count; i++) {
+       for (i = 0; i < cnt; i++) {
                 struct xdp_frame *xdpf = bq->q[i];
  
                 prefetch(xdpf);
         }
  
-       sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
+       if (bq->xdp_prog) {
+               to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
+               if (!to_send)
+                       goto out;
+       }
+
+       sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
         if (sent < 0) {
                 /* If ndo_xdp_xmit fails with an errno, no frames have
                  * been xmit'ed.
@@ -353,13 +402,12 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
         /* If not all frames have been transmitted, it is our
          * responsibility to free them
          */
-       for (i = sent; unlikely(i < bq->count); i++)
+       for (i = sent; unlikely(i < to_send); i++)
                 xdp_return_frame_rx_napi(bq->q[i]);
  
-       trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, bq->count - sent, err);
-       bq->dev_rx = NULL;
+out:
         bq->count = 0;
-       __list_del_clearprev(&bq->flush_node);
+       trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
  }
  
  /* __dev_flush is called from xdp_do_flush() which _must_ be signaled
@@ -377,13 +425,17 @@ void __dev_flush(void)
         struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
         struct xdp_dev_bulk_queue *bq, *tmp;
  
-       list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
+       list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
                 bq_xmit_all(bq, XDP_XMIT_FLUSH);
+               bq->dev_rx = NULL;
+               bq->xdp_prog = NULL;
+               __list_del_clearprev(&bq->flush_node);
+       }
  }
  
  /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
- * update happens in parallel here a dev_put wont happen until after reading the
- * ifindex.
+ * update happens in parallel here a dev_put won't happen until after reading
+ * the ifindex.
   */
  static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
  {
@@ -401,7 +453,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
   * Thus, safe percpu variable access.
   */
  static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
-                      struct net_device *dev_rx)
+                      struct net_device *dev_rx, struct bpf_prog *xdp_prog)
  {
         struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
         struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
@@ -412,18 +464,22 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
         /* Ingress dev_rx will be the same for all xdp_frame's in
          * bulk_queue, because bq stored per-CPU and must be flushed
          * from net_device drivers NAPI func end.
+        *
+        * Do the same with xdp_prog and flush_list since these fields
+        * are only ever modified together.
          */
-       if (!bq->dev_rx)
+       if (!bq->dev_rx) {
                 bq->dev_rx = dev_rx;
+               bq->xdp_prog = xdp_prog;
+               list_add(&bq->flush_node, flush_list);
+       }
  
         bq->q[bq->count++] = xdpf;
-
-       if (!bq->flush_node.prev)
-               list_add(&bq->flush_node, flush_list);
  }
  
  static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
-                              struct net_device *dev_rx)
+                               struct net_device *dev_rx,
+                               struct bpf_prog *xdp_prog)
  {
         struct xdp_frame *xdpf;
         int err;
@@ -439,55 +495,115 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
         if (unlikely(!xdpf))
                 return -EOVERFLOW;
  
-       bq_enqueue(dev, xdpf, dev_rx);
+       bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
         return 0;
  }
  
-static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
-                                        struct xdp_buff *xdp,
-                                        struct bpf_prog *xdp_prog)
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
  {
-       struct xdp_txq_info txq = { .dev = dev };
-       u32 act;
+       return __xdp_enqueue(dev, xdp, dev_rx, NULL);
+}
  
-       xdp_set_data_meta_invalid(xdp);
-       xdp->txq = &txq;
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
+{
+       struct net_device *dev = dst->dev;
  
-       act = bpf_prog_run_xdp(xdp_prog, xdp);
-       switch (act) {
-       case XDP_PASS:
-               return xdp;
-       case XDP_DROP:
-               break;
-       default:
-               bpf_warn_invalid_xdp_action(act);
-               fallthrough;
-       case XDP_ABORTED:
-               trace_xdp_exception(dev, xdp_prog, act);
-               break;
-       }
+       return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
+}
  
-       xdp_return_buff(xdp);
-       return NULL;
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
+                        int exclude_ifindex)
+{
+       if (!obj || obj->dev->ifindex == exclude_ifindex ||
+           !obj->dev->netdev_ops->ndo_xdp_xmit)
+               return false;
+
+       if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data))
+               return false;
+
+       return true;
  }
  
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
-                   struct net_device *dev_rx)
+static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
+                                struct net_device *dev_rx,
+                                struct xdp_frame *xdpf)
  {
-       return __xdp_enqueue(dev, xdp, dev_rx);
+       struct xdp_frame *nxdpf;
+
+       nxdpf = xdpf_clone(xdpf);
+       if (!nxdpf)
+               return -ENOMEM;
+
+       bq_enqueue(obj->dev, nxdpf, dev_rx, obj->xdp_prog);
+
+       return 0;
  }
  
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
-                   struct net_device *dev_rx)
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+                         struct bpf_map *map, bool exclude_ingress)
  {
-       struct net_device *dev = dst->dev;
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
+       struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       struct hlist_head *head;
+       struct xdp_frame *xdpf;
+       unsigned int i;
+       int err;
  
-       if (dst->xdp_prog) {
-               xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
-               if (!xdp)
-                       return 0;
+       xdpf = xdp_convert_buff_to_frame(xdp);
+       if (unlikely(!xdpf))
+               return -EOVERFLOW;
+
+       if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+               for (i = 0; i < map->max_entries; i++) {
+                       dst = READ_ONCE(dtab->netdev_map[i]);
+                       if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                               continue;
+
+                       /* we only need n-1 clones; last_dst enqueued below */
+                       if (!last_dst) {
+                               last_dst = dst;
+                               continue;
+                       }
+
+                       err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
+                       if (err)
+                               return err;
+
+                       last_dst = dst;
+               }
+       } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
+               for (i = 0; i < dtab->n_buckets; i++) {
+                       head = dev_map_index_hash(dtab, i);
+                       hlist_for_each_entry_rcu(dst, head, index_hlist,
+                                                lockdep_is_held(&dtab->index_lock)) {
+                               if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                                       continue;
+
+                               /* we only need n-1 clones; last_dst enqueued below */
+                               if (!last_dst) {
+                                       last_dst = dst;
+                                       continue;
+                               }
+
+                               err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
+                               if (err)
+                                       return err;
+
+                               last_dst = dst;
+                       }
+               }
         }
-       return __xdp_enqueue(dev, xdp, dev_rx);
+
+       /* consume the last copy of the frame */
+       if (last_dst)
+               bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
+       else
+               xdp_return_frame_rx_napi(xdpf); /* dtab is empty */
+
+       return 0;
  }
  
  int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
@@ -504,6 +620,87 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
         return 0;
  }
  
+static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
+                                 struct sk_buff *skb,
+                                 struct bpf_prog *xdp_prog)
+{
+       struct sk_buff *nskb;
+       int err;
+
+       nskb = skb_clone(skb, GFP_ATOMIC);
+       if (!nskb)
+               return -ENOMEM;
+
+       err = dev_map_generic_redirect(dst, nskb, xdp_prog);
+       if (unlikely(err)) {
+               consume_skb(nskb);
+               return err;
+       }
+
+       return 0;
+}
+
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+                          struct bpf_prog *xdp_prog, struct bpf_map *map,
+                          bool exclude_ingress)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
+       struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       struct hlist_head *head;
+       struct hlist_node *next;
+       unsigned int i;
+       int err;
+
+       if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+               for (i = 0; i < map->max_entries; i++) {
+                       dst = READ_ONCE(dtab->netdev_map[i]);
+                       if (!dst || dst->dev->ifindex == exclude_ifindex)
+                               continue;
+
+                       /* we only need n-1 clones; last_dst enqueued below */
+                       if (!last_dst) {
+                               last_dst = dst;
+                               continue;
+                       }
+
+                       err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
+                       if (err)
+                               return err;
+
+                       last_dst = dst;
+               }
+       } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
+               for (i = 0; i < dtab->n_buckets; i++) {
+                       head = dev_map_index_hash(dtab, i);
+                       hlist_for_each_entry_safe(dst, next, head, index_hlist) {
+                               if (!dst || dst->dev->ifindex == exclude_ifindex)
+                                       continue;
+
+                               /* we only need n-1 clones; last_dst enqueued below */
+                               if (!last_dst) {
+                                       last_dst = dst;
+                                       continue;
+                               }
+
+                               err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
+                               if (err)
+                                       return err;
+
+                               last_dst = dst;
+                       }
+               }
+       }
+
+       /* consume the first skb and return */
+       if (last_dst)
+               return dev_map_generic_redirect(last_dst, skb, xdp_prog);
+
+       /* dtab is empty */
+       consume_skb(skb);
+       return 0;
+}
+
  static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
  {
         struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
@@ -730,12 +927,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
  
  static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
  {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags,
+                                     BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
+                                     __dev_map_lookup_elem);
  }
  
  static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
  {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags,
+                                     BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
+                                     __dev_map_hash_lookup_elem);
  }
  
  static int dev_map_btf_id;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c

index d7ebb12..6f6681b 100644 (file)
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -46,12 +46,12 @@
   * events, kprobes and tracing to be invoked before the prior invocation
   * from one of these contexts completed. sys_bpf() uses the same mechanism
   * by pinning the task to the current CPU and incrementing the recursion
- * protection accross the map operation.
+ * protection across the map operation.
   *
   * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
   * operations like memory allocations (even with GFP_ATOMIC) from atomic
   * contexts. This is required because even with GFP_ATOMIC the memory
- * allocator calls into code pathes which acquire locks with long held lock
+ * allocator calls into code paths which acquire locks with long held lock
   * sections. To ensure the deterministic behaviour these locks are regular
   * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
   * true atomic contexts on an RT kernel are the low level hardware
@@ -1401,6 +1401,100 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
         rcu_read_unlock();
  }
  
+static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+                                            void *value, bool is_lru_map,
+                                            bool is_percpu, u64 flags)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct hlist_nulls_head *head;
+       unsigned long bflags;
+       struct htab_elem *l;
+       u32 hash, key_size;
+       struct bucket *b;
+       int ret;
+
+       key_size = map->key_size;
+
+       hash = htab_map_hash(key, key_size, htab->hashrnd);
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       ret = htab_lock_bucket(htab, b, hash, &bflags);
+       if (ret)
+               return ret;
+
+       l = lookup_elem_raw(head, hash, key, key_size);
+       if (!l) {
+               ret = -ENOENT;
+       } else {
+               if (is_percpu) {
+                       u32 roundup_value_size = round_up(map->value_size, 8);
+                       void __percpu *pptr;
+                       int off = 0, cpu;
+
+                       pptr = htab_elem_get_ptr(l, key_size);
+                       for_each_possible_cpu(cpu) {
+                               bpf_long_memcpy(value + off,
+                                               per_cpu_ptr(pptr, cpu),
+                                               roundup_value_size);
+                               off += roundup_value_size;
+                       }
+               } else {
+                       u32 roundup_key_size = round_up(map->key_size, 8);
+
+                       if (flags & BPF_F_LOCK)
+                               copy_map_value_locked(map, value, l->key +
+                                                     roundup_key_size,
+                                                     true);
+                       else
+                               copy_map_value(map, value, l->key +
+                                              roundup_key_size);
+                       check_and_init_map_lock(map, value);
+               }
+
+               hlist_nulls_del_rcu(&l->hash_node);
+               if (!is_lru_map)
+                       free_htab_elem(htab, l);
+       }
+
+       htab_unlock_bucket(htab, b, hash, bflags);
+
+       if (is_lru_map && l)
+               bpf_lru_push_free(&htab->lru, &l->lru_node);
+
+       return ret;
+}
+
+static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+                                          void *value, u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
+                                                flags);
+}
+
+static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+                                                 void *key, void *value,
+                                                 u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
+                                                flags);
+}
+
+static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+                                              void *value, u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
+                                                flags);
+}
+
+static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+                                                     void *key, void *value,
+                                                     u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
+                                                flags);
+}
+
  static int
  __htab_map_lookup_and_delete_batch(struct bpf_map *map,
                                    const union bpf_attr *attr,
@@ -1934,6 +2028,7 @@ const struct bpf_map_ops htab_map_ops = {
         .map_free = htab_map_free,
         .map_get_next_key = htab_map_get_next_key,
         .map_lookup_elem = htab_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
         .map_update_elem = htab_map_update_elem,
         .map_delete_elem = htab_map_delete_elem,
         .map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2049,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
         .map_free = htab_map_free,
         .map_get_next_key = htab_map_get_next_key,
         .map_lookup_elem = htab_lru_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
         .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
         .map_update_elem = htab_lru_map_update_elem,
         .map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2173,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
         .map_free = htab_map_free,
         .map_get_next_key = htab_map_get_next_key,
         .map_lookup_elem = htab_percpu_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
         .map_update_elem = htab_percpu_map_update_elem,
         .map_delete_elem = htab_map_delete_elem,
         .map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2193,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
         .map_free = htab_map_free,
         .map_get_next_key = htab_map_get_next_key,
         .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
         .map_update_elem = htab_lru_percpu_map_update_elem,
         .map_delete_elem = htab_lru_map_delete_elem,
         .map_seq_show_elem = htab_percpu_map_seq_show_elem,
diff --git a/kernel/bpf/preload/iterators/iterators.bpf.c b/kernel/bpf/preload/iterators/iterators.bpf.c

index 52aa7b3..03af863 100644 (file)
--- a/kernel/bpf/preload/iterators/iterators.bpf.c
+++ b/kernel/bpf/preload/iterators/iterators.bpf.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include <linux/bpf.h>
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  #include <bpf/bpf_core_read.h>
  
  #pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c

index 4838922..93a5539 100644 (file)
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -102,7 +102,7 @@ static void reuseport_array_free(struct bpf_map *map)
         /*
          * ops->map_*_elem() will not be able to access this
          * array now. Hence, this function only races with
-        * bpf_sk_reuseport_detach() which was triggerred by
+        * bpf_sk_reuseport_detach() which was triggered by
          * close() or disconnect().
          *
          * This function and bpf_sk_reuseport_detach() are
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c

index 73d15bc..e343f15 100644 (file)
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1484,7 +1484,7 @@ free_buf:
         return err;
  }
  
-#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
  
  static int map_lookup_and_delete_elem(union bpf_attr *attr)
  {
@@ -1500,6 +1500,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
         if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
                 return -EINVAL;
  
+       if (attr->flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
         f = fdget(ufd);
         map = __bpf_map_get(f);
         if (IS_ERR(map))
@@ -1510,24 +1513,47 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
                 goto err_put;
         }
  
+       if (attr->flags &&
+           (map->map_type == BPF_MAP_TYPE_QUEUE ||
+            map->map_type == BPF_MAP_TYPE_STACK)) {
+               err = -EINVAL;
+               goto err_put;
+       }
+
+       if ((attr->flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map)) {
+               err = -EINVAL;
+               goto err_put;
+       }
+
         key = __bpf_copy_key(ukey, map->key_size);
         if (IS_ERR(key)) {
                 err = PTR_ERR(key);
                 goto err_put;
         }
  
-       value_size = map->value_size;
+       value_size = bpf_map_value_size(map);
  
         err = -ENOMEM;
         value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
         if (!value)
                 goto free_key;
  
+       err = -ENOTSUPP;
         if (map->map_type == BPF_MAP_TYPE_QUEUE ||
             map->map_type == BPF_MAP_TYPE_STACK) {
                 err = map->ops->map_pop_elem(map, value);
-       } else {
-               err = -ENOTSUPP;
+       } else if (map->map_type == BPF_MAP_TYPE_HASH ||
+                  map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+                  map->map_type == BPF_MAP_TYPE_LRU_HASH ||
+                  map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               if (!bpf_map_is_dev_bound(map)) {
+                       bpf_disable_instrumentation();
+                       rcu_read_lock();
+                       err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
+                       rcu_read_unlock();
+                       bpf_enable_instrumentation();
+               }
         }
  
         if (err)
@@ -1947,6 +1973,11 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
                         attr->expected_attach_type =
                                 BPF_CGROUP_INET_SOCK_CREATE;
                 break;
+       case BPF_PROG_TYPE_SK_REUSEPORT:
+               if (!attr->expected_attach_type)
+                       attr->expected_attach_type =
+                               BPF_SK_REUSEPORT_SELECT;
+               break;
         }
  }
  
@@ -2030,6 +2061,14 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                 if (expected_attach_type == BPF_SK_LOOKUP)
                         return 0;
                 return -EINVAL;
+       case BPF_PROG_TYPE_SK_REUSEPORT:
+               switch (expected_attach_type) {
+               case BPF_SK_REUSEPORT_SELECT:
+               case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
+                       return 0;
+               default:
+                       return -EINVAL;
+               }
         case BPF_PROG_TYPE_SYSCALL:
         case BPF_PROG_TYPE_EXT:
                 if (expected_attach_type)
diff --git a/kernel/bpf/tnum.c b/kernel/bpf/tnum.c

index ceac528..3d7127f 100644 (file)
--- a/kernel/bpf/tnum.c
+++ b/kernel/bpf/tnum.c
@@ -111,28 +111,31 @@ struct tnum tnum_xor(struct tnum a, struct tnum b)
         return TNUM(v & ~mu, mu);
  }
  
-/* half-multiply add: acc += (unknown * mask * value).
- * An intermediate step in the multiply algorithm.
+/* Generate partial products by multiplying each bit in the multiplier (tnum a)
+ * with the multiplicand (tnum b), and add the partial products after
+ * appropriately bit-shifting them. Instead of directly performing tnum addition
+ * on the generated partial products, equivalenty, decompose each partial
+ * product into two tnums, consisting of the value-sum (acc_v) and the
+ * mask-sum (acc_m) and then perform tnum addition on them. The following paper
+ * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.
   */
-static struct tnum hma(struct tnum acc, u64 value, u64 mask)
-{
-       while (mask) {
-               if (mask & 1)
-                       acc = tnum_add(acc, TNUM(0, value));
-               mask >>= 1;
-               value <<= 1;
-       }
-       return acc;
-}
-
  struct tnum tnum_mul(struct tnum a, struct tnum b)
  {
-       struct tnum acc;
-       u64 pi;
-
-       pi = a.value * b.value;
-       acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
-       return hma(acc, b.mask, a.value);
+       u64 acc_v = a.value * b.value;
+       struct tnum acc_m = TNUM(0, 0);
+
+       while (a.value || a.mask) {
+               /* LSB of tnum a is a certain 1 */
+               if (a.value & 1)
+                       acc_m = tnum_add(acc_m, TNUM(0, b.mask));
+               /* LSB of tnum a is uncertain */
+               else if (a.mask & 1)
+                       acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask));
+               /* Note: no case for LSB is certain 0 */
+               a = tnum_rshift(a, 1);
+               b = tnum_lshift(b, 1);
+       }
+       return tnum_add(TNUM(acc_v, 0), acc_m);
  }
  
  /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c

index 2d44b5a..28a3630 100644 (file)
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -552,7 +552,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
   * __bpf_prog_enter returns:
   * 0 - skip execution of the bpf prog
   * 1 - execute bpf prog
- * [2..MAX_U64] - excute bpf prog and record execution time.
+ * [2..MAX_U64] - execute bpf prog and record execution time.
   *     This is start time.
   */
  u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c

index 331b170..b7d51fc 100644 (file)
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -47,7 +47,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
   * - unreachable insns exist (shouldn't be a forest. program = one function)
   * - out of bounds or malformed jumps
   * The second pass is all possible path descent from the 1st insn.
- * Since it's analyzing all pathes through the program, the length of the
+ * Since it's analyzing all paths through the program, the length of the
   * analysis is limited to 64k insn, which may be hit even if total number of
   * insn is less then 4K, but there are too many branches that change stack/regs.
   * Number of 'branches to be analyzed' is limited to 1k
@@ -132,7 +132,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
   * If it's ok, then verifier allows this BPF_CALL insn and looks at
   * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
   * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
- * returns ether pointer to map value or NULL.
+ * returns either pointer to map value or NULL.
   *
   * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
   * insn, the register holding that pointer in the true branch changes state to
@@ -2616,7 +2616,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
                 if (dst_reg != BPF_REG_FP) {
                         /* The backtracking logic can only recognize explicit
                          * stack slot address like [fp - 8]. Other spill of
-                        * scalar via different register has to be conervative.
+                        * scalar via different register has to be conservative.
                          * Backtrack from here and mark all registers as precise
                          * that contributed into 'reg' being a constant.
                          */
@@ -9059,7 +9059,7 @@ static int check_return_code(struct bpf_verifier_env *env)
             !prog->aux->attach_func_proto->type)
                 return 0;
  
-       /* eBPF calling convetion is such that R0 is used
+       /* eBPF calling convention is such that R0 is used
          * to return the value from eBPF program.
          * Make sure that it's readable at this time
          * of bpf_exit, which means that program wrote
@@ -9850,7 +9850,7 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
   * Since the verifier pushes the branch states as it sees them while exploring
   * the program the condition of walking the branch instruction for the second
   * time means that all states below this branch were already explored and
- * their final liveness markes are already propagated.
+ * their final liveness marks are already propagated.
   * Hence when the verifier completes the search of state list in is_state_visited()
   * we can call this clean_live_states() function to mark all liveness states
   * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
@@ -12470,7 +12470,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
  
                         /* mark bpf_tail_call as different opcode to avoid
-                        * conditional branch in the interpeter for every normal
+                        * conditional branch in the interpreter for every normal
                          * call and to prevent accidental JITing by JIT compiler
                          * that doesn't support bpf_tail_call yet
                          */
diff --git a/net/core/filter.c b/net/core/filter.c

index 239de13..0b13d81 100644 (file)
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3931,6 +3931,23 @@ void xdp_do_flush(void)
  }
  EXPORT_SYMBOL_GPL(xdp_do_flush);
  
+void bpf_clear_redirect_map(struct bpf_map *map)
+{
+       struct bpf_redirect_info *ri;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+               /* Avoid polluting remote cacheline due to writes if
+                * not needed. Once we pass this test, we need the
+                * cmpxchg() to make sure it hasn't been changed in
+                * the meantime by remote CPU.
+                */
+               if (unlikely(READ_ONCE(ri->map) == map))
+                       cmpxchg(&ri->map, map, NULL);
+       }
+}
+
  int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                     struct bpf_prog *xdp_prog)
  {
@@ -3938,6 +3955,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
         enum bpf_map_type map_type = ri->map_type;
         void *fwd = ri->tgt_value;
         u32 map_id = ri->map_id;
+       struct bpf_map *map;
         int err;
  
         ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
@@ -3947,7 +3965,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
         case BPF_MAP_TYPE_DEVMAP:
                 fallthrough;
         case BPF_MAP_TYPE_DEVMAP_HASH:
-               err = dev_map_enqueue(fwd, xdp, dev);
+               map = READ_ONCE(ri->map);
+               if (unlikely(map)) {
+                       WRITE_ONCE(ri->map, NULL);
+                       err = dev_map_enqueue_multi(xdp, dev, map,
+                                                   ri->flags & BPF_F_EXCLUDE_INGRESS);
+               } else {
+                       err = dev_map_enqueue(fwd, xdp, dev);
+               }
                 break;
         case BPF_MAP_TYPE_CPUMAP:
                 err = cpu_map_enqueue(fwd, xdp, dev);
@@ -3989,13 +4014,21 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
                                        enum bpf_map_type map_type, u32 map_id)
  {
         struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       struct bpf_map *map;
         int err;
  
         switch (map_type) {
         case BPF_MAP_TYPE_DEVMAP:
                 fallthrough;
         case BPF_MAP_TYPE_DEVMAP_HASH:
-               err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+               map = READ_ONCE(ri->map);
+               if (unlikely(map)) {
+                       WRITE_ONCE(ri->map, NULL);
+                       err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
+                                                    ri->flags & BPF_F_EXCLUDE_INGRESS);
+               } else {
+                       err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+               }
                 if (unlikely(err))
                         goto err;
                 break;
@@ -10012,11 +10045,13 @@ out:
  static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
                                     struct sock_reuseport *reuse,
                                     struct sock *sk, struct sk_buff *skb,
+                                   struct sock *migrating_sk,
                                     u32 hash)
  {
         reuse_kern->skb = skb;
         reuse_kern->sk = sk;
         reuse_kern->selected_sk = NULL;
+       reuse_kern->migrating_sk = migrating_sk;
         reuse_kern->data_end = skb->data + skb_headlen(skb);
         reuse_kern->hash = hash;
         reuse_kern->reuseport_id = reuse->reuseport_id;
@@ -10025,12 +10060,13 @@ static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
  
  struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                                   struct bpf_prog *prog, struct sk_buff *skb,
+                                 struct sock *migrating_sk,
                                   u32 hash)
  {
         struct sk_reuseport_kern reuse_kern;
         enum sk_action action;
  
-       bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
+       bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
         action = BPF_PROG_RUN(prog, &reuse_kern);
  
         if (action == SK_PASS)
@@ -10140,6 +10176,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
                 return &sk_reuseport_load_bytes_proto;
         case BPF_FUNC_skb_load_bytes_relative:
                 return &sk_reuseport_load_bytes_relative_proto;
+       case BPF_FUNC_get_socket_cookie:
+               return &bpf_get_socket_ptr_cookie_proto;
         default:
                 return bpf_base_func_proto(func_id);
         }
@@ -10169,6 +10207,14 @@ sk_reuseport_is_valid_access(int off, int size,
         case offsetof(struct sk_reuseport_md, hash):
                 return size == size_default;
  
+       case offsetof(struct sk_reuseport_md, sk):
+               info->reg_type = PTR_TO_SOCKET;
+               return size == sizeof(__u64);
+
+       case offsetof(struct sk_reuseport_md, migrating_sk):
+               info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+               return size == sizeof(__u64);
+
         /* Fields that allow narrowing */
         case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
                 if (size < sizeof_field(struct sk_buff, protocol))
@@ -10241,6 +10287,14 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
         case offsetof(struct sk_reuseport_md, bind_inany):
                 SK_REUSEPORT_LOAD_FIELD(bind_inany);
                 break;
+
+       case offsetof(struct sk_reuseport_md, sk):
+               SK_REUSEPORT_LOAD_FIELD(sk);
+               break;
+
+       case offsetof(struct sk_reuseport_md, migrating_sk):
+               SK_REUSEPORT_LOAD_FIELD(migrating_sk);
+               break;
         }
  
         return insn - insn_buf;
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c

index b065f0a..de5ee3a 100644 (file)
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -17,6 +17,74 @@
  DEFINE_SPINLOCK(reuseport_lock);
  
  static DEFINE_IDA(reuseport_ida);
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+                              struct sock_reuseport *reuse, bool bind_inany);
+
+static int reuseport_sock_index(struct sock *sk,
+                               const struct sock_reuseport *reuse,
+                               bool closed)
+{
+       int left, right;
+
+       if (!closed) {
+               left = 0;
+               right = reuse->num_socks;
+       } else {
+               left = reuse->max_socks - reuse->num_closed_socks;
+               right = reuse->max_socks;
+       }
+
+       for (; left < right; left++)
+               if (reuse->socks[left] == sk)
+                       return left;
+       return -1;
+}
+
+static void __reuseport_add_sock(struct sock *sk,
+                                struct sock_reuseport *reuse)
+{
+       reuse->socks[reuse->num_socks] = sk;
+       /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
+       smp_wmb();
+       reuse->num_socks++;
+}
+
+static bool __reuseport_detach_sock(struct sock *sk,
+                                   struct sock_reuseport *reuse)
+{
+       int i = reuseport_sock_index(sk, reuse, false);
+
+       if (i == -1)
+               return false;
+
+       reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+       reuse->num_socks--;
+
+       return true;
+}
+
+static void __reuseport_add_closed_sock(struct sock *sk,
+                                       struct sock_reuseport *reuse)
+{
+       reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
+       /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+       WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
+}
+
+static bool __reuseport_detach_closed_sock(struct sock *sk,
+                                          struct sock_reuseport *reuse)
+{
+       int i = reuseport_sock_index(sk, reuse, true);
+
+       if (i == -1)
+               return false;
+
+       reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+       /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+       WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
+
+       return true;
+}
  
  static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
  {
@@ -49,6 +117,12 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
         reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
                                           lockdep_is_held(&reuseport_lock));
         if (reuse) {
+               if (reuse->num_closed_socks) {
+                       /* sk was shutdown()ed before */
+                       ret = reuseport_resurrect(sk, reuse, NULL, bind_inany);
+                       goto out;
+               }
+
                 /* Only set reuse->bind_inany if the bind_inany is true.
                  * Otherwise, it will overwrite the reuse->bind_inany
                  * which was set by the bind/hash path.
@@ -72,9 +146,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
         }
  
         reuse->reuseport_id = id;
+       reuse->bind_inany = bind_inany;
         reuse->socks[0] = sk;
         reuse->num_socks = 1;
-       reuse->bind_inany = bind_inany;
         rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
  
  out:
@@ -90,14 +164,30 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
         u32 more_socks_size, i;
  
         more_socks_size = reuse->max_socks * 2U;
-       if (more_socks_size > U16_MAX)
+       if (more_socks_size > U16_MAX) {
+               if (reuse->num_closed_socks) {
+                       /* Make room by removing a closed sk.
+                        * The child has already been migrated.
+                        * Only reqsk left at this point.
+                        */
+                       struct sock *sk;
+
+                       sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+                       RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL);
+                       __reuseport_detach_closed_sock(sk, reuse);
+
+                       return reuse;
+               }
+
                 return NULL;
+       }
  
         more_reuse = __reuseport_alloc(more_socks_size);
         if (!more_reuse)
                 return NULL;
  
         more_reuse->num_socks = reuse->num_socks;
+       more_reuse->num_closed_socks = reuse->num_closed_socks;
         more_reuse->prog = reuse->prog;
         more_reuse->reuseport_id = reuse->reuseport_id;
         more_reuse->bind_inany = reuse->bind_inany;
@@ -105,9 +195,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
  
         memcpy(more_reuse->socks, reuse->socks,
                reuse->num_socks * sizeof(struct sock *));
+       memcpy(more_reuse->socks +
+              (more_reuse->max_socks - more_reuse->num_closed_socks),
+              reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
+              reuse->num_closed_socks * sizeof(struct sock *));
         more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
  
-       for (i = 0; i < reuse->num_socks; ++i)
+       for (i = 0; i < reuse->max_socks; ++i)
                 rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
                                    more_reuse);
  
@@ -152,13 +246,21 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
         reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
                                           lockdep_is_held(&reuseport_lock));
         old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
-                                            lockdep_is_held(&reuseport_lock));
+                                             lockdep_is_held(&reuseport_lock));
+       if (old_reuse && old_reuse->num_closed_socks) {
+               /* sk was shutdown()ed before */
+               int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
+
+               spin_unlock_bh(&reuseport_lock);
+               return err;
+       }
+
         if (old_reuse && old_reuse->num_socks != 1) {
                 spin_unlock_bh(&reuseport_lock);
                 return -EBUSY;
         }
  
-       if (reuse->num_socks == reuse->max_socks) {
+       if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
                 reuse = reuseport_grow(reuse);
                 if (!reuse) {
                         spin_unlock_bh(&reuseport_lock);
@@ -166,10 +268,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
                 }
         }
  
-       reuse->socks[reuse->num_socks] = sk;
-       /* paired with smp_rmb() in reuseport_select_sock() */
-       smp_wmb();
-       reuse->num_socks++;
+       __reuseport_add_sock(sk, reuse);
         rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
  
         spin_unlock_bh(&reuseport_lock);
@@ -180,15 +279,77 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
  }
  EXPORT_SYMBOL(reuseport_add_sock);
  
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+                              struct sock_reuseport *reuse, bool bind_inany)
+{
+       if (old_reuse == reuse) {
+               /* If sk was in the same reuseport group, just pop sk out of
+                * the closed section and push sk into the listening section.
+                */
+               __reuseport_detach_closed_sock(sk, old_reuse);
+               __reuseport_add_sock(sk, old_reuse);
+               return 0;
+       }
+
+       if (!reuse) {
+               /* In bind()/listen() path, we cannot carry over the eBPF prog
+                * for the shutdown()ed socket. In setsockopt() path, we should
+                * not change the eBPF prog of listening sockets by attaching a
+                * prog to the shutdown()ed socket. Thus, we will allocate a new
+                * reuseport group and detach sk from the old group.
+                */
+               int id;
+
+               reuse = __reuseport_alloc(INIT_SOCKS);
+               if (!reuse)
+                       return -ENOMEM;
+
+               id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
+               if (id < 0) {
+                       kfree(reuse);
+                       return id;
+               }
+
+               reuse->reuseport_id = id;
+               reuse->bind_inany = bind_inany;
+       } else {
+               /* Move sk from the old group to the new one if
+                * - all the other listeners in the old group were close()d or
+                *   shutdown()ed, and then sk2 has listen()ed on the same port
+                * OR
+                * - sk listen()ed without bind() (or with autobind), was
+                *   shutdown()ed, and then listen()s on another port which
+                *   sk2 listen()s on.
+                */
+               if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
+                       reuse = reuseport_grow(reuse);
+                       if (!reuse)
+                               return -ENOMEM;
+               }
+       }
+
+       __reuseport_detach_closed_sock(sk, old_reuse);
+       __reuseport_add_sock(sk, reuse);
+       rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+       if (old_reuse->num_socks + old_reuse->num_closed_socks == 0)
+               call_rcu(&old_reuse->rcu, reuseport_free_rcu);
+
+       return 0;
+}
+
  void reuseport_detach_sock(struct sock *sk)
  {
         struct sock_reuseport *reuse;
-       int i;
  
         spin_lock_bh(&reuseport_lock);
         reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
                                           lockdep_is_held(&reuseport_lock));
  
+       /* reuseport_grow() has detached a closed sk */
+       if (!reuse)
+               goto out;
+
         /* Notify the bpf side. The sk may be added to a sockarray
          * map. If so, sockarray logic will remove it from the map.
          *
@@ -201,19 +362,52 @@ void reuseport_detach_sock(struct sock *sk)
  
         rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
  
-       for (i = 0; i < reuse->num_socks; i++) {
-               if (reuse->socks[i] == sk) {
-                       reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
-                       reuse->num_socks--;
-                       if (reuse->num_socks == 0)
-                               call_rcu(&reuse->rcu, reuseport_free_rcu);
-                       break;
-               }
-       }
+       if (!__reuseport_detach_closed_sock(sk, reuse))
+               __reuseport_detach_sock(sk, reuse);
+
+       if (reuse->num_socks + reuse->num_closed_socks == 0)
+               call_rcu(&reuse->rcu, reuseport_free_rcu);
+
+out:
         spin_unlock_bh(&reuseport_lock);
  }
  EXPORT_SYMBOL(reuseport_detach_sock);
  
+void reuseport_stop_listen_sock(struct sock *sk)
+{
+       if (sk->sk_protocol == IPPROTO_TCP) {
+               struct sock_reuseport *reuse;
+               struct bpf_prog *prog;
+
+               spin_lock_bh(&reuseport_lock);
+
+               reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+                                                 lockdep_is_held(&reuseport_lock));
+               prog = rcu_dereference_protected(reuse->prog,
+                                                lockdep_is_held(&reuseport_lock));
+
+               if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+                   (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
+                       /* Migration capable, move sk from the listening section
+                        * to the closed section.
+                        */
+                       bpf_sk_reuseport_detach(sk);
+
+                       __reuseport_detach_sock(sk, reuse);
+                       __reuseport_add_closed_sock(sk, reuse);
+
+                       spin_unlock_bh(&reuseport_lock);
+                       return;
+               }
+
+               spin_unlock_bh(&reuseport_lock);
+       }
+
+       /* Not capable to do migration, detach immediately */
+       reuseport_detach_sock(sk);
+}
+EXPORT_SYMBOL(reuseport_stop_listen_sock);
+
  static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
                                    struct bpf_prog *prog, struct sk_buff *skb,
                                    int hdr_len)
@@ -244,6 +438,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
         return reuse->socks[index];
  }
  
+static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
+                                                 u32 hash, u16 num_socks)
+{
+       int i, j;
+
+       i = j = reciprocal_scale(hash, num_socks);
+       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+               i++;
+               if (i >= num_socks)
+                       i = 0;
+               if (i == j)
+                       return NULL;
+       }
+
+       return reuse->socks[i];
+}
+
  /**
   *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
   *  @sk: First socket in the group.
@@ -274,32 +485,21 @@ struct sock *reuseport_select_sock(struct sock *sk,
         prog = rcu_dereference(reuse->prog);
         socks = READ_ONCE(reuse->num_socks);
         if (likely(socks)) {
-               /* paired with smp_wmb() in reuseport_add_sock() */
+               /* paired with smp_wmb() in __reuseport_add_sock() */
                 smp_rmb();
  
                 if (!prog || !skb)
                         goto select_by_hash;
  
                 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
-                       sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+                       sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash);
                 else
                         sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
  
  select_by_hash:
                 /* no bpf or invalid bpf result: fall back to hash usage */
-               if (!sk2) {
-                       int i, j;
-
-                       i = j = reciprocal_scale(hash, socks);
-                       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
-                               i++;
-                               if (i >= socks)
-                                       i = 0;
-                               if (i == j)
-                                       goto out;
-                       }
-                       sk2 = reuse->socks[i];
-               }
+               if (!sk2)
+                       sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
         }
  
  out:
@@ -308,14 +508,84 @@ out:
  }
  EXPORT_SYMBOL(reuseport_select_sock);
  
+/**
+ *  reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
+ *  @sk: close()ed or shutdown()ed socket in the group.
+ *  @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
+ *    NEW_SYN_RECV request socket during 3WHS.
+ *  @skb: skb to run through BPF filter.
+ *  Returns a socket (with sk_refcnt +1) that should accept the child socket
+ *  (or NULL on error).
+ */
+struct sock *reuseport_migrate_sock(struct sock *sk,
+                                   struct sock *migrating_sk,
+                                   struct sk_buff *skb)
+{
+       struct sock_reuseport *reuse;
+       struct sock *nsk = NULL;
+       bool allocated = false;
+       struct bpf_prog *prog;
+       u16 socks;
+       u32 hash;
+
+       rcu_read_lock();
+
+       reuse = rcu_dereference(sk->sk_reuseport_cb);
+       if (!reuse)
+               goto out;
+
+       socks = READ_ONCE(reuse->num_socks);
+       if (unlikely(!socks))
+               goto out;
+
+       /* paired with smp_wmb() in __reuseport_add_sock() */
+       smp_rmb();
+
+       hash = migrating_sk->sk_hash;
+       prog = rcu_dereference(reuse->prog);
+       if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
+               if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+                       goto select_by_hash;
+               goto out;
+       }
+
+       if (!skb) {
+               skb = alloc_skb(0, GFP_ATOMIC);
+               if (!skb)
+                       goto out;
+               allocated = true;
+       }
+
+       nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash);
+
+       if (allocated)
+               kfree_skb(skb);
+
+select_by_hash:
+       if (!nsk)
+               nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
+
+       if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+               nsk = NULL;
+
+out:
+       rcu_read_unlock();
+       return nsk;
+}
+EXPORT_SYMBOL(reuseport_migrate_sock);
+
  int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
  {
         struct sock_reuseport *reuse;
         struct bpf_prog *old_prog;
  
-       if (sk_unhashed(sk) && sk->sk_reuseport) {
-               int err = reuseport_alloc(sk, false);
+       if (sk_unhashed(sk)) {
+               int err;
  
+               if (!sk->sk_reuseport)
+                       return -EINVAL;
+
+               err = reuseport_alloc(sk, false);
                 if (err)
                         return err;
         } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -341,13 +611,24 @@ int reuseport_detach_prog(struct sock *sk)
         struct sock_reuseport *reuse;
         struct bpf_prog *old_prog;
  
-       if (!rcu_access_pointer(sk->sk_reuseport_cb))
-               return sk->sk_reuseport ? -ENOENT : -EINVAL;
-
         old_prog = NULL;
         spin_lock_bh(&reuseport_lock);
         reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
                                           lockdep_is_held(&reuseport_lock));
+
+       /* reuse must be checked after acquiring the reuseport_lock
+        * because reuseport_grow() can detach a closed sk.
+        */
+       if (!reuse) {
+               spin_unlock_bh(&reuseport_lock);
+               return sk->sk_reuseport ? -ENOENT : -EINVAL;
+       }
+
+       if (sk_unhashed(sk) && reuse->num_closed_socks) {
+               spin_unlock_bh(&reuseport_lock);
+               return -ENOENT;
+       }
+
         old_prog = rcu_replace_pointer(reuse->prog, old_prog,
                                        lockdep_is_held(&reuseport_lock));
         spin_unlock_bh(&reuseport_lock);
diff --git a/net/core/xdp.c b/net/core/xdp.c

index 858276e..725d20f 100644 (file)
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -584,3 +584,31 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
         return __xdp_build_skb_from_frame(xdpf, skb, dev);
  }
  EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
+
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
+{
+       unsigned int headroom, totalsize;
+       struct xdp_frame *nxdpf;
+       struct page *page;
+       void *addr;
+
+       headroom = xdpf->headroom + sizeof(*xdpf);
+       totalsize = headroom + xdpf->len;
+
+       if (unlikely(totalsize > PAGE_SIZE))
+               return NULL;
+       page = dev_alloc_page();
+       if (!page)
+               return NULL;
+       addr = page_to_virt(page);
+
+       memcpy(addr, xdpf, totalsize);
+
+       nxdpf = addr;
+       nxdpf->data = addr + headroom;
+       nxdpf->frame_sz = PAGE_SIZE;
+       nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+       nxdpf->mem.id = 0;
+
+       return nxdpf;
+}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c

index fd472ea..0eea878 100644 (file)
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -135,10 +135,18 @@ static int inet_csk_bind_conflict(const struct sock *sk,
                                   bool relax, bool reuseport_ok)
  {
         struct sock *sk2;
+       bool reuseport_cb_ok;
         bool reuse = sk->sk_reuse;
         bool reuseport = !!sk->sk_reuseport;
+       struct sock_reuseport *reuseport_cb;
         kuid_t uid = sock_i_uid((struct sock *)sk);
  
+       rcu_read_lock();
+       reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
+       /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
+       reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
+       rcu_read_unlock();
+
         /*
          * Unlike other sk lookup places we do not check
          * for sk_net here, since _all_ the socks listed
@@ -156,14 +164,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
                                 if ((!relax ||
                                      (!reuseport_ok &&
                                       reuseport && sk2->sk_reuseport &&
-                                     !rcu_access_pointer(sk->sk_reuseport_cb) &&
+                                     reuseport_cb_ok &&
                                       (sk2->sk_state == TCP_TIME_WAIT ||
                                        uid_eq(uid, sock_i_uid(sk2))))) &&
                                     inet_rcv_saddr_equal(sk, sk2, true))
                                         break;
                         } else if (!reuseport_ok ||
                                    !reuseport || !sk2->sk_reuseport ||
-                                  rcu_access_pointer(sk->sk_reuseport_cb) ||
+                                  !reuseport_cb_ok ||
                                    (sk2->sk_state != TCP_TIME_WAIT &&
                                     !uid_eq(uid, sock_i_uid(sk2)))) {
                                 if (inet_rcv_saddr_equal(sk, sk2, true))
@@ -687,6 +695,64 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
  }
  EXPORT_SYMBOL(inet_rtx_syn_ack);
  
+static struct request_sock *inet_reqsk_clone(struct request_sock *req,
+                                            struct sock *sk)
+{
+       struct sock *req_sk, *nreq_sk;
+       struct request_sock *nreq;
+
+       nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
+       if (!nreq) {
+               /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
+               sock_put(sk);
+               return NULL;
+       }
+
+       req_sk = req_to_sk(req);
+       nreq_sk = req_to_sk(nreq);
+
+       memcpy(nreq_sk, req_sk,
+              offsetof(struct sock, sk_dontcopy_begin));
+       memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
+              req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
+       sk_node_init(&nreq_sk->sk_node);
+       nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
+#ifdef CONFIG_XPS
+       nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
+#endif
+       nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
+
+       nreq->rsk_listener = sk;
+
+       /* We need not acquire fastopenq->lock
+        * because the child socket is locked in inet_csk_listen_stop().
+        */
+       if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
+               rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
+
+       return nreq;
+}
+
+static void reqsk_queue_migrated(struct request_sock_queue *queue,
+                                const struct request_sock *req)
+{
+       if (req->num_timeout == 0)
+               atomic_inc(&queue->young);
+       atomic_inc(&queue->qlen);
+}
+
+static void reqsk_migrate_reset(struct request_sock *req)
+{
+       req->saved_syn = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+       inet_rsk(req)->ipv6_opt = NULL;
+       inet_rsk(req)->pktopts = NULL;
+#else
+       inet_rsk(req)->ireq_opt = NULL;
+#endif
+}
+
  /* return true if req was found in the ehash table */
  static bool reqsk_queue_unlink(struct request_sock *req)
  {
@@ -727,15 +793,39 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
  static void reqsk_timer_handler(struct timer_list *t)
  {
         struct request_sock *req = from_timer(req, t, rsk_timer);
+       struct request_sock *nreq = NULL, *oreq = req;
         struct sock *sk_listener = req->rsk_listener;
-       struct net *net = sock_net(sk_listener);
-       struct inet_connection_sock *icsk = inet_csk(sk_listener);
-       struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+       struct inet_connection_sock *icsk;
+       struct request_sock_queue *queue;
+       struct net *net;
         int max_syn_ack_retries, qlen, expire = 0, resend = 0;
  
-       if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
-               goto drop;
+       if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
+               struct sock *nsk;
+
+               nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
+               if (!nsk)
+                       goto drop;
+
+               nreq = inet_reqsk_clone(req, nsk);
+               if (!nreq)
+                       goto drop;
  
+               /* The new timer for the cloned req can decrease the 2
+                * by calling inet_csk_reqsk_queue_drop_and_put(), so
+                * hold another count to prevent use-after-free and
+                * call reqsk_put() just before return.
+                */
+               refcount_set(&nreq->rsk_refcnt, 2 + 1);
+               timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+               reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
+
+               req = nreq;
+               sk_listener = nsk;
+       }
+
+       icsk = inet_csk(sk_listener);
+       net = sock_net(sk_listener);
         max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
         /* Normally all the openreqs are young and become mature
          * (i.e. converted to established socket) for first timeout.
@@ -754,6 +844,7 @@ static void reqsk_timer_handler(struct timer_list *t)
          * embrions; and abort old ones without pity, if old
          * ones are about to clog our table.
          */
+       queue = &icsk->icsk_accept_queue;
         qlen = reqsk_queue_len(queue);
         if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
                 int young = reqsk_queue_len_young(queue) << 1;
@@ -778,10 +869,36 @@ static void reqsk_timer_handler(struct timer_list *t)
                         atomic_dec(&queue->young);
                 timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
                 mod_timer(&req->rsk_timer, jiffies + timeo);
+
+               if (!nreq)
+                       return;
+
+               if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
+                       /* delete timer */
+                       inet_csk_reqsk_queue_drop(sk_listener, nreq);
+                       goto drop;
+               }
+
+               reqsk_migrate_reset(oreq);
+               reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
+               reqsk_put(oreq);
+
+               reqsk_put(nreq);
                 return;
         }
+
  drop:
-       inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
+       /* Even if we can clone the req, we may need not retransmit any more
+        * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
+        * CPU may win the "own_req" race so that inet_ehash_insert() fails.
+        */
+       if (nreq) {
+               reqsk_migrate_reset(nreq);
+               reqsk_queue_removed(queue, nreq);
+               __reqsk_free(nreq);
+       }
+
+       inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
  }
  
  static void reqsk_queue_hash_req(struct request_sock *req,
@@ -997,12 +1114,40 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
                                          struct request_sock *req, bool own_req)
  {
         if (own_req) {
-               inet_csk_reqsk_queue_drop(sk, req);
-               reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-               if (inet_csk_reqsk_queue_add(sk, req, child))
+               inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+               reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+
+               if (sk != req->rsk_listener) {
+                       /* another listening sk has been selected,
+                        * migrate the req to it.
+                        */
+                       struct request_sock *nreq;
+
+                       /* hold a refcnt for the nreq->rsk_listener
+                        * which is assigned in inet_reqsk_clone()
+                        */
+                       sock_hold(sk);
+                       nreq = inet_reqsk_clone(req, sk);
+                       if (!nreq) {
+                               inet_child_forget(sk, req, child);
+                               goto child_put;
+                       }
+
+                       refcount_set(&nreq->rsk_refcnt, 1);
+                       if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+                               reqsk_migrate_reset(req);
+                               reqsk_put(req);
+                               return child;
+                       }
+
+                       reqsk_migrate_reset(nreq);
+                       __reqsk_free(nreq);
+               } else if (inet_csk_reqsk_queue_add(sk, req, child)) {
                         return child;
+               }
         }
         /* Too bad, another child took ownership of the request, undo. */
+child_put:
         bh_unlock_sock(child);
         sock_put(child);
         return NULL;
@@ -1028,14 +1173,36 @@ void inet_csk_listen_stop(struct sock *sk)
          * of the variants now.                 --ANK
          */
         while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
-               struct sock *child = req->sk;
+               struct sock *child = req->sk, *nsk;
+               struct request_sock *nreq;
  
                 local_bh_disable();
                 bh_lock_sock(child);
                 WARN_ON(sock_owned_by_user(child));
                 sock_hold(child);
  
+               nsk = reuseport_migrate_sock(sk, child, NULL);
+               if (nsk) {
+                       nreq = inet_reqsk_clone(req, nsk);
+                       if (nreq) {
+                               refcount_set(&nreq->rsk_refcnt, 1);
+
+                               if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+                                       reqsk_migrate_reset(req);
+                               } else {
+                                       reqsk_migrate_reset(nreq);
+                                       __reqsk_free(nreq);
+                               }
+
+                               /* inet_csk_reqsk_queue_add() has already
+                                * called inet_child_forget() on failure case.
+                                */
+                               goto skip_child_forget;
+                       }
+               }
+
                 inet_child_forget(sk, req, child);
+skip_child_forget:
                 reqsk_put(req);
                 bh_unlock_sock(child);
                 local_bh_enable();
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c

index c96866a..80aeaf9 100644 (file)
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -697,7 +697,7 @@ void inet_unhash(struct sock *sk)
                 goto unlock;
  
         if (rcu_access_pointer(sk->sk_reuseport_cb))
-               reuseport_detach_sock(sk);
+               reuseport_stop_listen_sock(sk);
         if (ilb) {
                 inet_unhash2(hashinfo, sk);
                 ilb->count--;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c

index 4fa77f1..6f1e64d 100644 (file)
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -961,6 +961,15 @@ static struct ctl_table ipv4_net_table[] = {
         },
  #endif
         {
+               .procname       = "tcp_migrate_req",
+               .data           = &init_net.ipv4.sysctl_tcp_migrate_req,
+               .maxlen         = sizeof(u8),
+               .mode           = 0644,
+               .proc_handler   = proc_dou8vec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE
+       },
+       {
                 .procname       = "tcp_reordering",
                 .data           = &init_net.ipv4.sysctl_tcp_reordering,
                 .maxlen         = sizeof(int),
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c

index 4f5b68a..6cb8e26 100644 (file)
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2002,13 +2002,21 @@ process:
                         goto csum_error;
                 }
                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
-                       inet_csk_reqsk_queue_drop_and_put(sk, req);
-                       goto lookup;
+                       nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+                       if (!nsk) {
+                               inet_csk_reqsk_queue_drop_and_put(sk, req);
+                               goto lookup;
+                       }
+                       sk = nsk;
+                       /* reuseport_migrate_sock() has already held one sk_refcnt
+                        * before returning.
+                        */
+               } else {
+                       /* We own a reference on the listener, increase it again
+                        * as we might lose it too soon.
+                        */
+                       sock_hold(sk);
                 }
-               /* We own a reference on the listener, increase it again
-                * as we might lose it too soon.
-                */
-               sock_hold(sk);
                 refcounted = true;
                 nsk = NULL;
                 if (!tcp_filter(sk, skb)) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c

index 7513ba4..f258a4c 100644 (file)
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -775,8 +775,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                 goto listen_overflow;
  
         if (own_req && rsk_drop_req(req)) {
-               reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-               inet_csk_reqsk_queue_drop_and_put(sk, req);
+               reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+               inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
                 return child;
         }
  
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c

index 4435fa3..4d71464 100644 (file)
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1664,10 +1664,18 @@ process:
                         goto csum_error;
                 }
                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
-                       inet_csk_reqsk_queue_drop_and_put(sk, req);
-                       goto lookup;
+                       nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+                       if (!nsk) {
+                               inet_csk_reqsk_queue_drop_and_put(sk, req);
+                               goto lookup;
+                       }
+                       sk = nsk;
+                       /* reuseport_migrate_sock() has already held one sk_refcnt
+                        * before returning.
+                        */
+               } else {
+                       sock_hold(sk);
                 }
-               sock_hold(sk);
                 refcounted = true;
                 nsk = NULL;
                 if (!tcp_filter(sk, skb)) {
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c

index 56a28a6..f01ef6b 100644 (file)
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -27,7 +27,7 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
  {
         unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
  
-       kfree(umem->pgs);
+       kvfree(umem->pgs);
         umem->pgs = NULL;
  }
  
@@ -99,8 +99,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
         long npgs;
         int err;
  
-       umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
-                           GFP_KERNEL | __GFP_NOWARN);
+       umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
         if (!umem->pgs)
                 return -ENOMEM;
  
@@ -123,7 +122,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
  out_pin:
         xdp_umem_unpin_pages(umem);
  out_pgs:
-       kfree(umem->pgs);
+       kvfree(umem->pgs);
         umem->pgs = NULL;
         return err;
  }
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c

index 67b4ce5..9df75ea 100644 (file)
--- a/net/xdp/xskmap.c
+++ b/net/xdp/xskmap.c
@@ -226,7 +226,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
  
  static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
  {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+                                     __xsk_map_lookup_elem);
  }
  
  void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile

index 45ceca4..520434e 100644 (file)
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -41,6 +41,7 @@ tprogs-y += test_map_in_map
  tprogs-y += per_socket_stats_example
  tprogs-y += xdp_redirect
  tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect_map_multi
  tprogs-y += xdp_redirect_cpu
  tprogs-y += xdp_monitor
  tprogs-y += xdp_rxq_info
@@ -99,6 +100,7 @@ test_map_in_map-objs := test_map_in_map_user.o
  per_socket_stats_example-objs := cookie_uid_helper_example.o
  xdp_redirect-objs := xdp_redirect_user.o
  xdp_redirect_map-objs := xdp_redirect_map_user.o
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o
  xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
  xdp_monitor-objs := xdp_monitor_user.o
  xdp_rxq_info-objs := xdp_rxq_info_user.o
@@ -160,6 +162,7 @@ always-y += tcp_tos_reflect_kern.o
  always-y += tcp_dumpstats_kern.o
  always-y += xdp_redirect_kern.o
  always-y += xdp_redirect_map_kern.o
+always-y += xdp_redirect_map_multi_kern.o
  always-y += xdp_redirect_cpu_kern.o
  always-y += xdp_monitor_kern.o
  always-y += xdp_rxq_info_kern.o
diff --git a/samples/bpf/ibumad_kern.c b/samples/bpf/ibumad_kern.c

index 26dcd4d..9b19323 100644 (file)
--- a/samples/bpf/ibumad_kern.c
+++ b/samples/bpf/ibumad_kern.c
@@ -1,6 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
  
-/**
+/*
   * ibumad BPF sample kernel side
   *
   * This program is free software; you can redistribute it and/or
diff --git a/samples/bpf/ibumad_user.c b/samples/bpf/ibumad_user.c

index d83d810..0746ca5 100644 (file)
--- a/samples/bpf/ibumad_user.c
+++ b/samples/bpf/ibumad_user.c
@@ -1,6 +1,6 @@
  // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
  
-/**
+/*
   * ibumad BPF sample user side
   *
   * This program is free software; you can redistribute it and/or
diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c

index 74a4583..0006126 100644 (file)
--- a/samples/bpf/xdp_fwd_user.c
+++ b/samples/bpf/xdp_fwd_user.c
@@ -67,6 +67,8 @@ static void usage(const char *prog)
                 "usage: %s [OPTS] interface-list\n"
                 "\nOPTS:\n"
                 "    -d    detach program\n"
+               "    -S    use skb-mode\n"
+               "    -F    force loading prog\n"
                 "    -D    direct table lookups (skip fib rules)\n",
                 prog);
  }
diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi_kern.c

new file mode 100644 (file)

index 0000000..71aa23d
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_multi_kern.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(max_entries, 32);
+} forward_map_general SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 32);
+} forward_map_native SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __type(key, u32);
+       __type(value, long);
+       __uint(max_entries, 1);
+} rxcnt SEC(".maps");
+
+/* map to store egress interfaces mac addresses, set the
+ * max_entries to 1 and extend it in user sapce prog.
+ */
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, u32);
+       __type(value, __be64);
+       __uint(max_entries, 1);
+} mac_map SEC(".maps");
+
+static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
+{
+       long *value;
+       u32 key = 0;
+
+       /* count packet in global counter */
+       value = bpf_map_lookup_elem(&rxcnt, &key);
+       if (value)
+               *value += 1;
+
+       return bpf_redirect_map(forward_map, key,
+                               BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_redirect_general")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &forward_map_general);
+}
+
+SEC("xdp_redirect_native")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &forward_map_native);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       u32 key = ctx->egress_ifindex;
+       struct ethhdr *eth = data;
+       __be64 *mac;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       mac = bpf_map_lookup_elem(&mac_map, &key);
+       if (mac)
+               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c

new file mode 100644 (file)

index 0000000..84cdbbe
--- /dev/null
+++ b/samples/bpf/xdp_redirect_map_multi_user.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+static int rxcnt_map_fd;
+
+static void int_exit(int sig)
+{
+       __u32 prog_id = 0;
+       int i;
+
+       for (i = 0; ifaces[i] > 0; i++) {
+               if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+                       printf("bpf_get_link_xdp_id failed\n");
+                       exit(1);
+               }
+               if (prog_id)
+                       bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+       }
+
+       exit(0);
+}
+
+static void poll_stats(int interval)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       __u64 values[nr_cpus], prev[nr_cpus];
+
+       memset(prev, 0, sizeof(prev));
+
+       while (1) {
+               __u64 sum = 0;
+               __u32 key = 0;
+               int i;
+
+               sleep(interval);
+               assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
+               for (i = 0; i < nr_cpus; i++)
+                       sum += (values[i] - prev[i]);
+               if (sum)
+                       printf("Forwarding %10llu pkt/s\n", sum / interval);
+               memcpy(prev, values, sizeof(values));
+       }
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+       char ifname[IF_NAMESIZE];
+       struct ifreq ifr;
+       int fd, ret = -1;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return ret;
+
+       if (!if_indextoname(ifindex, ifname))
+               goto err_out;
+
+       strcpy(ifr.ifr_name, ifname);
+
+       if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+               goto err_out;
+
+       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+       ret = 0;
+
+err_out:
+       close(fd);
+       return ret;
+}
+
+static int update_mac_map(struct bpf_object *obj)
+{
+       int i, ret = -1, mac_map_fd;
+       unsigned char mac_addr[6];
+       unsigned int ifindex;
+
+       mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map");
+       if (mac_map_fd < 0) {
+               printf("find mac map fd failed\n");
+               return ret;
+       }
+
+       for (i = 0; ifaces[i] > 0; i++) {
+               ifindex = ifaces[i];
+
+               ret = get_mac_addr(ifindex, mac_addr);
+               if (ret < 0) {
+                       printf("get interface %d mac failed\n", ifindex);
+                       return ret;
+               }
+
+               ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
+               if (ret) {
+                       perror("bpf_update_elem mac_map_fd");
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+               "OPTS:\n"
+               "    -S    use skb-mode\n"
+               "    -N    enforce native mode\n"
+               "    -F    force loading prog\n"
+               "    -X    load xdp program on egress\n",
+               prog);
+}
+
+int main(int argc, char **argv)
+{
+       int i, ret, opt, forward_map_fd, max_ifindex = 0;
+       struct bpf_program *ingress_prog, *egress_prog;
+       int ingress_prog_fd, egress_prog_fd = 0;
+       struct bpf_devmap_val devmap_val;
+       bool attach_egress_prog = false;
+       char ifname[IF_NAMESIZE];
+       struct bpf_map *mac_map;
+       struct bpf_object *obj;
+       unsigned int ifindex;
+       char filename[256];
+
+       while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+               switch (opt) {
+               case 'S':
+                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       /* default, set below */
+                       break;
+               case 'F':
+                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       break;
+               case 'X':
+                       attach_egress_prog = true;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+               xdp_flags |= XDP_FLAGS_DRV_MODE;
+       } else if (attach_egress_prog) {
+               printf("Load xdp program on egress with SKB mode not supported yet\n");
+               return 1;
+       }
+
+       if (optind == argc) {
+               printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+               return 1;
+       }
+
+       printf("Get interfaces");
+       for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+               ifaces[i] = if_nametoindex(argv[optind + i]);
+               if (!ifaces[i])
+                       ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+               if (!if_indextoname(ifaces[i], ifname)) {
+                       perror("Invalid interface name or i");
+                       return 1;
+               }
+
+               /* Find the largest index number */
+               if (ifaces[i] > max_ifindex)
+                       max_ifindex = ifaces[i];
+
+               printf(" %d", ifaces[i]);
+       }
+       printf("\n");
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       obj = bpf_object__open(filename);
+       if (libbpf_get_error(obj)) {
+               printf("ERROR: opening BPF object file failed\n");
+               obj = NULL;
+               goto err_out;
+       }
+
+       /* Reset the map size to max ifindex + 1 */
+       if (attach_egress_prog) {
+               mac_map = bpf_object__find_map_by_name(obj, "mac_map");
+               ret = bpf_map__resize(mac_map, max_ifindex + 1);
+               if (ret < 0) {
+                       printf("ERROR: reset mac map size failed\n");
+                       goto err_out;
+               }
+       }
+
+       /* load BPF program */
+       if (bpf_object__load(obj)) {
+               printf("ERROR: loading BPF object file failed\n");
+               goto err_out;
+       }
+
+       if (xdp_flags & XDP_FLAGS_SKB_MODE) {
+               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
+               forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general");
+       } else {
+               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
+               forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native");
+       }
+       if (!ingress_prog || forward_map_fd < 0) {
+               printf("finding ingress_prog/forward_map in obj file failed\n");
+               goto err_out;
+       }
+
+       ingress_prog_fd = bpf_program__fd(ingress_prog);
+       if (ingress_prog_fd < 0) {
+               printf("find ingress_prog fd failed\n");
+               goto err_out;
+       }
+
+       rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+       if (rxcnt_map_fd < 0) {
+               printf("bpf_object__find_map_fd_by_name failed\n");
+               goto err_out;
+       }
+
+       if (attach_egress_prog) {
+               /* Update mac_map with all egress interfaces' mac addr */
+               if (update_mac_map(obj) < 0) {
+                       printf("Error: update mac map failed");
+                       goto err_out;
+               }
+
+               /* Find egress prog fd */
+               egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+               if (!egress_prog) {
+                       printf("finding egress_prog in obj file failed\n");
+                       goto err_out;
+               }
+               egress_prog_fd = bpf_program__fd(egress_prog);
+               if (egress_prog_fd < 0) {
+                       printf("find egress_prog fd failed\n");
+                       goto err_out;
+               }
+       }
+
+       /* Remove attached program when program is interrupted or killed */
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       /* Init forward multicast groups */
+       for (i = 0; ifaces[i] > 0; i++) {
+               ifindex = ifaces[i];
+
+               /* bind prog_fd to each interface */
+               ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags);
+               if (ret) {
+                       printf("Set xdp fd failed on %d\n", ifindex);
+                       goto err_out;
+               }
+
+               /* Add all the interfaces to forward group and attach
+                * egress devmap programe if exist
+                */
+               devmap_val.ifindex = ifindex;
+               devmap_val.bpf_prog.fd = egress_prog_fd;
+               ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0);
+               if (ret) {
+                       perror("bpf_map_update_elem forward_map");
+                       goto err_out;
+               }
+       }
+
+       poll_stats(2);
+
+       return 0;
+
+err_out:
+       return 1;
+}
diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c

index 706475e..495e098 100644 (file)
--- a/samples/bpf/xdp_sample_pkts_user.c
+++ b/samples/bpf/xdp_sample_pkts_user.c
@@ -103,7 +103,8 @@ static void usage(const char *prog)
         fprintf(stderr,
                 "%s: %s [OPTS] <ifname|ifindex>\n\n"
                 "OPTS:\n"
-               "    -F    force loading prog\n",
+               "    -F    force loading prog\n"
+               "    -S    use skb-mode\n",
                 __func__, prog);
  }
  
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile

index d16d289..d73232b 100644 (file)
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -136,7 +136,7 @@ endif
  
  BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
  
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o) $(OUTPUT)disasm.o
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
  OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
  
  VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)                           \
@@ -180,6 +180,9 @@ endif
  
  CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
  
+$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+       $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
  $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
         $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
  
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c

index 13b0aa7..1d71ff8 100644 (file)
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -713,6 +713,7 @@ static int do_skeleton(int argc, char **argv)
                 #ifndef %2$s                                                \n\
                 #define %2$s                                                \n\
                                                                             \n\
+               #include <errno.h>                                          \n\
                 #include <stdlib.h>                                         \n\
                 #include <bpf/libbpf.h>                                     \n\
                                                                             \n\
@@ -793,18 +794,23 @@ static int do_skeleton(int argc, char **argv)
                 %1$s__open_opts(const struct bpf_object_open_opts *opts)    \n\
                 {                                                           \n\
                         struct %1$s *obj;                                   \n\
+                       int err;                                            \n\
                                                                             \n\
                         obj = (struct %1$s *)calloc(1, sizeof(*obj));       \n\
-                       if (!obj)                                           \n\
+                       if (!obj) {                                         \n\
+                               errno = ENOMEM;                             \n\
                                 return NULL;                                \n\
-                       if (%1$s__create_skeleton(obj))                     \n\
-                               goto err;                                   \n\
-                       if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
-                               goto err;                                   \n\
+                       }                                                   \n\
+                                                                           \n\
+                       err = %1$s__create_skeleton(obj);                   \n\
+                       err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\
+                       if (err)                                            \n\
+                               goto err_out;                               \n\
                                                                             \n\
                         return obj;                                         \n\
-               err:                                                        \n\
+               err_out:                                                    \n\
                         %1$s__destroy(obj);                                 \n\
+                       errno = -err;                                       \n\
                         return NULL;                                        \n\
                 }                                                           \n\
                                                                             \n\
@@ -824,12 +830,15 @@ static int do_skeleton(int argc, char **argv)
                 %1$s__open_and_load(void)                                   \n\
                 {                                                           \n\
                         struct %1$s *obj;                                   \n\
+                       int err;                                            \n\
                                                                             \n\
                         obj = %1$s__open();                                 \n\
                         if (!obj)                                           \n\
                                 return NULL;                                \n\
-                       if (%1$s__load(obj)) {                              \n\
+                       err = %1$s__load(obj);                              \n\
+                       if (err) {                                          \n\
                                 %1$s__destroy(obj);                         \n\
+                               errno = -err;                               \n\
                                 return NULL;                                \n\
                         }                                                   \n\
                         return obj;                                         \n\
@@ -860,7 +869,7 @@ static int do_skeleton(int argc, char **argv)
                                                                             \n\
                         s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
                         if (!s)                                             \n\
-                               return -1;                                  \n\
+                               goto err;                                   \n\
                         obj->skeleton = s;                                  \n\
                                                                             \n\
                         s->sz = sizeof(*s);                                 \n\
@@ -949,7 +958,7 @@ static int do_skeleton(int argc, char **argv)
                         return 0;                                           \n\
                 err:                                                        \n\
                         bpf_object__destroy_skeleton(s);                    \n\
-                       return -1;                                          \n\
+                       return -ENOMEM;                                     \n\
                 }                                                           \n\
                                                                             \n\
                 #endif /* %s */                                             \n\
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c

index 7f2817d..3ddfd48 100644 (file)
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -341,8 +341,10 @@ static int do_batch(int argc, char **argv)
                 n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
                 if (!n_argc)
                         continue;
-               if (n_argc < 0)
+               if (n_argc < 0) {
+                       err = n_argc;
                         goto err_close;
+               }
  
                 if (json_output) {
                         jsonw_start_object(json_wtr);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h

index 418b9b8..bf9252c 100644 (file)
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
   *             Look up an element with the given *key* in the map referred to
   *             by the file descriptor *fd*, and if found, delete the element.
   *
+ *             For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *             types, the *flags* argument needs to be set to 0, but for other
+ *             map types, it may be specified as:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up and delete the value of a spin-locked map
+ *                     without returning the lock. This must be specified if
+ *                     the elements contain a spinlock.
+ *
   *             The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
   *             implement this command as a "pop" operation, deleting the top
   *             element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
   *             This command is only valid for the following map types:
   *             * **BPF_MAP_TYPE_QUEUE**
   *             * **BPF_MAP_TYPE_STACK**
+ *             * **BPF_MAP_TYPE_HASH**
+ *             * **BPF_MAP_TYPE_PERCPU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
   *
   *     Return
   *             Returns zero on success. On error, -1 is returned and *errno*
@@ -981,6 +994,8 @@ enum bpf_attach_type {
         BPF_SK_LOOKUP,
         BPF_XDP,
         BPF_SK_SKB_VERDICT,
+       BPF_SK_REUSEPORT_SELECT,
+       BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
         __MAX_BPF_ATTACH_TYPE
  };
  
@@ -2542,8 +2557,12 @@ union bpf_attr {
   *             The lower two bits of *flags* are used as the return code if
   *             the map lookup fails. This is so that the return value can be
   *             one of the XDP program return codes up to **XDP_TX**, as chosen
- *             by the caller. Any higher bits in the *flags* argument must be
- *             unset.
+ *             by the caller. The higher bits of *flags* can be set to
+ *             BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ *             With BPF_F_BROADCAST the packet will be broadcasted to all the
+ *             interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ *             interface will be excluded when do broadcasting.
   *
   *             See also **bpf_redirect**\ (), which only supports redirecting
   *             to an ifindex, but doesn't require a map to do so.
@@ -5109,6 +5128,12 @@ enum {
         BPF_F_BPRM_SECUREEXEC   = (1ULL << 0),
  };
  
+/* Flags for bpf_redirect_map helper */
+enum {
+       BPF_F_BROADCAST         = (1ULL << 3),
+       BPF_F_EXCLUDE_INGRESS   = (1ULL << 4),
+};
+
  #define __bpf_md_ptr(type, name)       \
  union {                                        \
         type name;                      \
@@ -5393,6 +5418,20 @@ struct sk_reuseport_md {
         __u32 ip_protocol;      /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
         __u32 bind_inany;       /* Is sock bound to an INANY address? */
         __u32 hash;             /* A hash of the packet 4 tuples */
+       /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+        * new incoming connection request (e.g. selecting a listen sk for
+        * the received SYN in the TCP case).  reuse->sk is one of the sk
+        * in the reuseport group. The bpf prog can use reuse->sk to learn
+        * the local listening ip/port without looking into the skb.
+        *
+        * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+        * reuse->migrating_sk is the socket that needs to be migrated
+        * to another listening socket.  migrating_sk could be a fullsock
+        * sk that is fully established or a reqsk that is in-the-middle
+        * of 3-way handshake.
+        */
+       __bpf_md_ptr(struct bpf_sock *, sk);
+       __bpf_md_ptr(struct bpf_sock *, migrating_sk);
  };
  
  #define BPF_TAG_SIZE   8
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile

index e43e189..ec14aa7 100644 (file)
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -223,18 +223,14 @@ install_lib: all_cmd
                 $(call do_install_mkdir,$(libdir_SQ)); \
                 cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
  
+INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+                 bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h             \
+                 bpf_endian.h bpf_core_read.h skel_internal.h
+
  install_headers: $(BPF_HELPER_DEFS)
-       $(call QUIET_INSTALL, headers) \
-               $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
-               $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
-               $(call do_install,btf.h,$(prefix)/include/bpf,644); \
-               $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
-               $(call do_install,xsk.h,$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
-               $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+       $(call QUIET_INSTALL, headers)                                       \
+               $(foreach hdr,$(INSTALL_HEADERS),                            \
+                       $(call do_install,$(hdr),$(prefix)/include/bpf,644);)
  
  install_pkgconfig: $(PC_FILE)
         $(call QUIET_INSTALL, $(PC_FILE)) \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c

index bba48ff..86dcac4 100644 (file)
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -80,6 +80,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
  int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, '\0', sizeof(attr));
  
@@ -102,7 +103,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
         else
                 attr.inner_map_fd = create_attr->inner_map_fd;
  
-       return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
@@ -160,6 +162,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
                                __u32 map_flags, int node)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, '\0', sizeof(attr));
  
@@ -178,7 +181,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
                 attr.numa_node = node;
         }
  
-       return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
@@ -222,10 +226,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
         int fd;
  
         if (!load_attr->log_buf != !load_attr->log_buf_sz)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.prog_type = load_attr->prog_type;
@@ -281,8 +285,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
                                                         load_attr->func_info_cnt,
                                                         load_attr->func_info_rec_size,
                                                         attr.func_info_rec_size);
-                       if (!finfo)
+                       if (!finfo) {
+                               errno = E2BIG;
                                 goto done;
+                       }
  
                         attr.func_info = ptr_to_u64(finfo);
                         attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -293,8 +299,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
                                                         load_attr->line_info_cnt,
                                                         load_attr->line_info_rec_size,
                                                         attr.line_info_rec_size);
-                       if (!linfo)
+                       if (!linfo) {
+                               errno = E2BIG;
                                 goto done;
+                       }
  
                         attr.line_info = ptr_to_u64(linfo);
                         attr.line_info_rec_size = load_attr->line_info_rec_size;
@@ -318,9 +326,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
  
         fd = sys_bpf_prog_load(&attr, sizeof(attr));
  done:
+       /* free() doesn't affect errno, so we don't need to restore it */
         free(finfo);
         free(linfo);
-       return fd;
+       return libbpf_err_errno(fd);
  }
  
  int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
@@ -329,7 +338,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
         struct bpf_prog_load_params p = {};
  
         if (!load_attr || !log_buf != !log_buf_sz)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         p.prog_type = load_attr->prog_type;
         p.expected_attach_type = load_attr->expected_attach_type;
@@ -391,6 +400,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
                        int log_level)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.prog_type = type;
@@ -404,13 +414,15 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
         attr.kern_version = kern_version;
         attr.prog_flags = prog_flags;
  
-       return sys_bpf_prog_load(&attr, sizeof(attr));
+       fd = sys_bpf_prog_load(&attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_map_update_elem(int fd, const void *key, const void *value,
                         __u64 flags)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
@@ -418,24 +430,28 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
         attr.value = ptr_to_u64(value);
         attr.flags = flags;
  
-       return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_map_lookup_elem(int fd, const void *key, void *value)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
         attr.key = ptr_to_u64(key);
         attr.value = ptr_to_u64(value);
  
-       return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
@@ -443,17 +459,33 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
         attr.value = ptr_to_u64(value);
         attr.flags = flags;
  
-       return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
  {
         union bpf_attr attr;
+       int ret;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.map_fd = fd;
+       attr.key = ptr_to_u64(key);
+       attr.value = ptr_to_u64(value);
+
+       ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
+}
+
+int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+       union bpf_attr attr;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
         attr.key = ptr_to_u64(key);
         attr.value = ptr_to_u64(value);
+       attr.flags = flags;
  
         return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
  }
@@ -461,34 +493,40 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
  int bpf_map_delete_elem(int fd, const void *key)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
         attr.key = ptr_to_u64(key);
  
-       return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_map_get_next_key(int fd, const void *key, void *next_key)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
         attr.key = ptr_to_u64(key);
         attr.next_key = ptr_to_u64(next_key);
  
-       return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_map_freeze(int fd)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_fd = fd;
  
-       return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
@@ -500,7 +538,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
         int ret;
  
         if (!OPTS_VALID(opts, bpf_map_batch_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.batch.map_fd = fd;
@@ -515,7 +553,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
         ret = sys_bpf(cmd, &attr, sizeof(attr));
         *count = attr.batch.count;
  
-       return ret;
+       return libbpf_err_errno(ret);
  }
  
  int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
@@ -552,22 +590,26 @@ int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
  int bpf_obj_pin(int fd, const char *pathname)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.pathname = ptr_to_u64((void *)pathname);
         attr.bpf_fd = fd;
  
-       return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_obj_get(const char *pathname)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.pathname = ptr_to_u64((void *)pathname);
  
-       return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
@@ -585,9 +627,10 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
                           const struct bpf_prog_attach_opts *opts)
  {
         union bpf_attr attr;
+       int ret;
  
         if (!OPTS_VALID(opts, bpf_prog_attach_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.target_fd     = target_fd;
@@ -596,30 +639,35 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
         attr.attach_flags  = OPTS_GET(opts, flags, 0);
         attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
  
-       return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.target_fd   = target_fd;
         attr.attach_type = type;
  
-       return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.target_fd   = target_fd;
         attr.attach_bpf_fd = prog_fd;
         attr.attach_type = type;
  
-       return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_link_create(int prog_fd, int target_fd,
@@ -628,15 +676,16 @@ int bpf_link_create(int prog_fd, int target_fd,
  {
         __u32 target_btf_id, iter_info_len;
         union bpf_attr attr;
+       int fd;
  
         if (!OPTS_VALID(opts, bpf_link_create_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         iter_info_len = OPTS_GET(opts, iter_info_len, 0);
         target_btf_id = OPTS_GET(opts, target_btf_id, 0);
  
         if (iter_info_len && target_btf_id)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.link_create.prog_fd = prog_fd;
@@ -652,26 +701,30 @@ int bpf_link_create(int prog_fd, int target_fd,
                 attr.link_create.target_btf_id = target_btf_id;
         }
  
-       return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_link_detach(int link_fd)
  {
         union bpf_attr attr;
+       int ret;
  
         memset(&attr, 0, sizeof(attr));
         attr.link_detach.link_fd = link_fd;
  
-       return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_link_update(int link_fd, int new_prog_fd,
                     const struct bpf_link_update_opts *opts)
  {
         union bpf_attr attr;
+       int ret;
  
         if (!OPTS_VALID(opts, bpf_link_update_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.link_update.link_fd = link_fd;
@@ -679,17 +732,20 @@ int bpf_link_update(int link_fd, int new_prog_fd,
         attr.link_update.flags = OPTS_GET(opts, flags, 0);
         attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
  
-       return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
  
  int bpf_iter_create(int link_fd)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.iter_create.link_fd = link_fd;
  
-       return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
@@ -706,10 +762,12 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
         attr.query.prog_ids     = ptr_to_u64(prog_ids);
  
         ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+
         if (attach_flags)
                 *attach_flags = attr.query.attach_flags;
         *prog_cnt = attr.query.prog_cnt;
-       return ret;
+
+       return libbpf_err_errno(ret);
  }
  
  int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
@@ -727,13 +785,15 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
         attr.test.repeat = repeat;
  
         ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
         if (size_out)
                 *size_out = attr.test.data_size_out;
         if (retval)
                 *retval = attr.test.retval;
         if (duration)
                 *duration = attr.test.duration;
-       return ret;
+
+       return libbpf_err_errno(ret);
  }
  
  int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
@@ -742,7 +802,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
         int ret;
  
         if (!test_attr->data_out && test_attr->data_size_out > 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.test.prog_fd = test_attr->prog_fd;
@@ -757,11 +817,13 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
         attr.test.repeat = test_attr->repeat;
  
         ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
         test_attr->data_size_out = attr.test.data_size_out;
         test_attr->ctx_size_out = attr.test.ctx_size_out;
         test_attr->retval = attr.test.retval;
         test_attr->duration = attr.test.duration;
-       return ret;
+
+       return libbpf_err_errno(ret);
  }
  
  int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
@@ -770,7 +832,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
         int ret;
  
         if (!OPTS_VALID(opts, bpf_test_run_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.test.prog_fd = prog_fd;
@@ -788,11 +850,13 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
         attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
  
         ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
         OPTS_SET(opts, data_size_out, attr.test.data_size_out);
         OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
         OPTS_SET(opts, duration, attr.test.duration);
         OPTS_SET(opts, retval, attr.test.retval);
-       return ret;
+
+       return libbpf_err_errno(ret);
  }
  
  static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
@@ -807,7 +871,7 @@ static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
         if (!err)
                 *next_id = attr.next_id;
  
-       return err;
+       return libbpf_err_errno(err);
  }
  
  int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
@@ -833,41 +897,49 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
  int bpf_prog_get_fd_by_id(__u32 id)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.prog_id = id;
  
-       return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_map_get_fd_by_id(__u32 id)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.map_id = id;
  
-       return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_btf_get_fd_by_id(__u32 id)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.btf_id = id;
  
-       return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_link_get_fd_by_id(__u32 id)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.link_id = id;
  
-       return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
@@ -881,21 +953,24 @@ int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
         attr.info.info = ptr_to_u64(info);
  
         err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+
         if (!err)
                 *info_len = attr.info.info_len;
  
-       return err;
+       return libbpf_err_errno(err);
  }
  
  int bpf_raw_tracepoint_open(const char *name, int prog_fd)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.raw_tracepoint.name = ptr_to_u64(name);
         attr.raw_tracepoint.prog_fd = prog_fd;
  
-       return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
@@ -915,12 +990,13 @@ retry:
         }
  
         fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
-       if (fd == -1 && !do_log && log_buf && log_buf_size) {
+
+       if (fd < 0 && !do_log && log_buf && log_buf_size) {
                 do_log = true;
                 goto retry;
         }
  
-       return fd;
+       return libbpf_err_errno(fd);
  }
  
  int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
@@ -937,37 +1013,42 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
         attr.task_fd_query.buf_len = *buf_len;
  
         err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+
         *buf_len = attr.task_fd_query.buf_len;
         *prog_id = attr.task_fd_query.prog_id;
         *fd_type = attr.task_fd_query.fd_type;
         *probe_offset = attr.task_fd_query.probe_offset;
         *probe_addr = attr.task_fd_query.probe_addr;
  
-       return err;
+       return libbpf_err_errno(err);
  }
  
  int bpf_enable_stats(enum bpf_stats_type type)
  {
         union bpf_attr attr;
+       int fd;
  
         memset(&attr, 0, sizeof(attr));
         attr.enable_stats.type = type;
  
-       return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
  }
  
  int bpf_prog_bind_map(int prog_fd, int map_fd,
                       const struct bpf_prog_bind_opts *opts)
  {
         union bpf_attr attr;
+       int ret;
  
         if (!OPTS_VALID(opts, bpf_prog_bind_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memset(&attr, 0, sizeof(attr));
         attr.prog_bind_map.prog_fd = prog_fd;
         attr.prog_bind_map.map_fd = map_fd;
         attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
  
-       return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
  }
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h

index 875dde2..4f758f8 100644 (file)
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -124,6 +124,8 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
                                          __u64 flags);
  LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
                                               void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
+                                                   void *value, __u64 flags);
  LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
  LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
  LIBBPF_API int bpf_map_freeze(int fd);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h

index 9720dc0..b9987c3 100644 (file)
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -158,4 +158,70 @@ enum libbpf_tristate {
  #define __kconfig __attribute__((section(".kconfig")))
  #define __ksym __attribute__((section(".ksyms")))
  
+#ifndef ___bpf_concat
+#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) \
+       ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+       ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...)                      \
+({                                                             \
+       static const char ___fmt[] = fmt;                       \
+       unsigned long long ___param[___bpf_narg(args)];         \
+                                                               \
+       _Pragma("GCC diagnostic push")                          \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
+       ___bpf_fill(___param, args);                            \
+       _Pragma("GCC diagnostic pop")                           \
+                                                               \
+       bpf_seq_printf(seq, ___fmt, sizeof(___fmt),             \
+                      ___param, sizeof(___param));             \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...)              \
+({                                                             \
+       static const char ___fmt[] = fmt;                       \
+       unsigned long long ___param[___bpf_narg(args)];         \
+                                                               \
+       _Pragma("GCC diagnostic push")                          \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
+       ___bpf_fill(___param, args);                            \
+       _Pragma("GCC diagnostic pop")                           \
+                                                               \
+       bpf_snprintf(out, out_size, ___fmt,                     \
+                    ___param, sizeof(___param));               \
+})
+
  #endif
diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c

index 3ed1a27..5c50309 100644 (file)
--- a/tools/lib/bpf/bpf_prog_linfo.c
+++ b/tools/lib/bpf/bpf_prog_linfo.c
@@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
         nr_linfo = info->nr_line_info;
  
         if (!nr_linfo)
-               return NULL;
+               return errno = EINVAL, NULL;
  
         /*
          * The min size that bpf_prog_linfo has to access for
@@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
          */
         if (info->line_info_rec_size <
             offsetof(struct bpf_line_info, file_name_off))
-               return NULL;
+               return errno = EINVAL, NULL;
  
         prog_linfo = calloc(1, sizeof(*prog_linfo));
         if (!prog_linfo)
-               return NULL;
+               return errno = ENOMEM, NULL;
  
         /* Copy xlated line_info */
         prog_linfo->nr_linfo = nr_linfo;
@@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
  
  err_free:
         bpf_prog_linfo__free(prog_linfo);
-       return NULL;
+       return errno = EINVAL, NULL;
  }
  
  const struct bpf_line_info *
@@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
         const __u64 *jited_linfo;
  
         if (func_idx >= prog_linfo->nr_jited_func)
-               return NULL;
+               return errno = ENOENT, NULL;
  
         nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
         if (nr_skip >= nr_linfo)
-               return NULL;
+               return errno = ENOENT, NULL;
  
         start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
         jited_rec_size = prog_linfo->jited_rec_size;
@@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
                 (start * jited_rec_size);
         jited_linfo = raw_jited_linfo;
         if (addr < *jited_linfo)
-               return NULL;
+               return errno = ENOENT, NULL;
  
         nr_linfo -= nr_skip;
         rec_size = prog_linfo->rec_size;
@@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
  
         nr_linfo = prog_linfo->nr_linfo;
         if (nr_skip >= nr_linfo)
-               return NULL;
+               return errno = ENOENT, NULL;
  
         rec_size = prog_linfo->rec_size;
         raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
         linfo = raw_linfo;
         if (insn_off < linfo->insn_off)
-               return NULL;
+               return errno = ENOENT, NULL;
  
         nr_linfo -= nr_skip;
         for (i = 0; i < nr_linfo; i++) {
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h

index 8c954eb..d6bfbe0 100644 (file)
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -25,26 +25,35 @@
         #define bpf_target_sparc
         #define bpf_target_defined
  #else
-       #undef bpf_target_defined
-#endif
  
  /* Fall back to what the compiler says */
-#ifndef bpf_target_defined
  #if defined(__x86_64__)
         #define bpf_target_x86
+       #define bpf_target_defined
  #elif defined(__s390__)
         #define bpf_target_s390
+       #define bpf_target_defined
  #elif defined(__arm__)
         #define bpf_target_arm
+       #define bpf_target_defined
  #elif defined(__aarch64__)
         #define bpf_target_arm64
+       #define bpf_target_defined
  #elif defined(__mips__)
         #define bpf_target_mips
+       #define bpf_target_defined
  #elif defined(__powerpc__)
         #define bpf_target_powerpc
+       #define bpf_target_defined
  #elif defined(__sparc__)
         #define bpf_target_sparc
+       #define bpf_target_defined
+#endif /* no compiler target */
+
  #endif
+
+#ifndef __BPF_TARGET_MISSING
+#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\""
  #endif
  
  #if defined(bpf_target_x86)
@@ -287,7 +296,7 @@ struct pt_regs;
  #elif defined(bpf_target_sparc)
  #define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({ (ip) = PT_REGS_RET(ctx); })
  #define BPF_KRETPROBE_READ_RET_IP              BPF_KPROBE_READ_RET_IP
-#else
+#elif defined(bpf_target_defined)
  #define BPF_KPROBE_READ_RET_IP(ip, ctx)                                            \
         ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
  #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)                                 \
@@ -295,13 +304,48 @@ struct pt_regs;
                           (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
  #endif
  
+#if !defined(bpf_target_defined)
+
+#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* !defined(bpf_target_defined) */
+
+#ifndef ___bpf_concat
  #define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
  #define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
  #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
  #define ___bpf_narg(...) \
         ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
-       ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+#endif
  
  #define ___bpf_ctx_cast0() ctx
  #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
@@ -413,56 +457,4 @@ typeof(name(0)) name(struct pt_regs *ctx)                              \
  }                                                                          \
  static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
  
-#define ___bpf_fill0(arr, p, x) do {} while (0)
-#define ___bpf_fill1(arr, p, x) arr[p] = x
-#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
-#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
-#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
-#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
-#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
-#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
-#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
-#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
-#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
-#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
-#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
-#define ___bpf_fill(arr, args...) \
-       ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
-
-/*
- * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
- * in a structure.
- */
-#define BPF_SEQ_PRINTF(seq, fmt, args...)                      \
-({                                                             \
-       static const char ___fmt[] = fmt;                       \
-       unsigned long long ___param[___bpf_narg(args)];         \
-                                                               \
-       _Pragma("GCC diagnostic push")                          \
-       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
-       ___bpf_fill(___param, args);                            \
-       _Pragma("GCC diagnostic pop")                           \
-                                                               \
-       bpf_seq_printf(seq, ___fmt, sizeof(___fmt),             \
-                      ___param, sizeof(___param));             \
-})
-
-/*
- * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
- * an array of u64.
- */
-#define BPF_SNPRINTF(out, out_size, fmt, args...)              \
-({                                                             \
-       static const char ___fmt[] = fmt;                       \
-       unsigned long long ___param[___bpf_narg(args)];         \
-                                                               \
-       _Pragma("GCC diagnostic push")                          \
-       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
-       ___bpf_fill(___param, args);                            \
-       _Pragma("GCC diagnostic pop")                           \
-                                                               \
-       bpf_snprintf(out, out_size, ___fmt,                     \
-                    ___param, sizeof(___param));               \
-})
-
  #endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c

index d57e13a..b46760b 100644 (file)
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -443,7 +443,7 @@ struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
  const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
  {
         if (type_id >= btf->start_id + btf->nr_types)
-               return NULL;
+               return errno = EINVAL, NULL;
         return btf_type_by_id((struct btf *)btf, type_id);
  }
  
@@ -510,7 +510,7 @@ size_t btf__pointer_size(const struct btf *btf)
  int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
  {
         if (ptr_sz != 4 && ptr_sz != 8)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         btf->ptr_sz = ptr_sz;
         return 0;
  }
@@ -537,7 +537,7 @@ enum btf_endianness btf__endianness(const struct btf *btf)
  int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
  {
         if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
         if (!btf->swapped_endian) {
@@ -568,8 +568,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
         int i;
  
         t = btf__type_by_id(btf, type_id);
-       for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
-            i++) {
+       for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
                 switch (btf_kind(t)) {
                 case BTF_KIND_INT:
                 case BTF_KIND_STRUCT:
@@ -592,12 +591,12 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
                 case BTF_KIND_ARRAY:
                         array = btf_array(t);
                         if (nelems && array->nelems > UINT32_MAX / nelems)
-                               return -E2BIG;
+                               return libbpf_err(-E2BIG);
                         nelems *= array->nelems;
                         type_id = array->type;
                         break;
                 default:
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 }
  
                 t = btf__type_by_id(btf, type_id);
@@ -605,9 +604,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
  
  done:
         if (size < 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (nelems && size > UINT32_MAX / nelems)
-               return -E2BIG;
+               return libbpf_err(-E2BIG);
  
         return nelems * size;
  }
@@ -640,7 +639,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
                 for (i = 0; i < vlen; i++, m++) {
                         align = btf__align_of(btf, m->type);
                         if (align <= 0)
-                               return align;
+                               return libbpf_err(align);
                         max_align = max(max_align, align);
                 }
  
@@ -648,7 +647,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
         }
         default:
                 pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
-               return 0;
+               return errno = EINVAL, 0;
         }
  }
  
@@ -667,7 +666,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
         }
  
         if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         return type_id;
  }
@@ -687,7 +686,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
                         return i;
         }
  
-       return -ENOENT;
+       return libbpf_err(-ENOENT);
  }
  
  __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
@@ -709,7 +708,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
                         return i;
         }
  
-       return -ENOENT;
+       return libbpf_err(-ENOENT);
  }
  
  static bool btf_is_modifiable(const struct btf *btf)
@@ -785,12 +784,12 @@ static struct btf *btf_new_empty(struct btf *base_btf)
  
  struct btf *btf__new_empty(void)
  {
-       return btf_new_empty(NULL);
+       return libbpf_ptr(btf_new_empty(NULL));
  }
  
  struct btf *btf__new_empty_split(struct btf *base_btf)
  {
-       return btf_new_empty(base_btf);
+       return libbpf_ptr(btf_new_empty(base_btf));
  }
  
  static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
@@ -846,7 +845,7 @@ done:
  
  struct btf *btf__new(const void *data, __u32 size)
  {
-       return btf_new(data, size, NULL);
+       return libbpf_ptr(btf_new(data, size, NULL));
  }
  
  static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
@@ -937,7 +936,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
                 goto done;
         }
         btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
-       if (IS_ERR(btf))
+       err = libbpf_get_error(btf);
+       if (err)
                 goto done;
  
         switch (gelf_getclass(elf)) {
@@ -953,9 +953,9 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
         }
  
         if (btf_ext && btf_ext_data) {
-               *btf_ext = btf_ext__new(btf_ext_data->d_buf,
-                                       btf_ext_data->d_size);
-               if (IS_ERR(*btf_ext))
+               *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+               err = libbpf_get_error(*btf_ext);
+               if (err)
                         goto done;
         } else if (btf_ext) {
                 *btf_ext = NULL;
@@ -965,30 +965,24 @@ done:
                 elf_end(elf);
         close(fd);
  
-       if (err)
-               return ERR_PTR(err);
-       /*
-        * btf is always parsed before btf_ext, so no need to clean up
-        * btf_ext, if btf loading failed
-        */
-       if (IS_ERR(btf))
+       if (!err)
                 return btf;
-       if (btf_ext && IS_ERR(*btf_ext)) {
-               btf__free(btf);
-               err = PTR_ERR(*btf_ext);
-               return ERR_PTR(err);
-       }
-       return btf;
+
+       if (btf_ext)
+               btf_ext__free(*btf_ext);
+       btf__free(btf);
+
+       return ERR_PTR(err);
  }
  
  struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
  {
-       return btf_parse_elf(path, NULL, btf_ext);
+       return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
  }
  
  struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
  {
-       return btf_parse_elf(path, base_btf, NULL);
+       return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
  }
  
  static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
@@ -1056,36 +1050,39 @@ err_out:
  
  struct btf *btf__parse_raw(const char *path)
  {
-       return btf_parse_raw(path, NULL);
+       return libbpf_ptr(btf_parse_raw(path, NULL));
  }
  
  struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
  {
-       return btf_parse_raw(path, base_btf);
+       return libbpf_ptr(btf_parse_raw(path, base_btf));
  }
  
  static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
  {
         struct btf *btf;
+       int err;
  
         if (btf_ext)
                 *btf_ext = NULL;
  
         btf = btf_parse_raw(path, base_btf);
-       if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+       err = libbpf_get_error(btf);
+       if (!err)
                 return btf;
-
+       if (err != -EPROTO)
+               return ERR_PTR(err);
         return btf_parse_elf(path, base_btf, btf_ext);
  }
  
  struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
  {
-       return btf_parse(path, NULL, btf_ext);
+       return libbpf_ptr(btf_parse(path, NULL, btf_ext));
  }
  
  struct btf *btf__parse_split(const char *path, struct btf *base_btf)
  {
-       return btf_parse(path, base_btf, NULL);
+       return libbpf_ptr(btf_parse(path, base_btf, NULL));
  }
  
  static int compare_vsi_off(const void *_a, const void *_b)
@@ -1178,7 +1175,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
                 }
         }
  
-       return err;
+       return libbpf_err(err);
  }
  
  static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
@@ -1191,13 +1188,13 @@ int btf__load(struct btf *btf)
         int err = 0;
  
         if (btf->fd >= 0)
-               return -EEXIST;
+               return libbpf_err(-EEXIST);
  
  retry_load:
         if (log_buf_size) {
                 log_buf = malloc(log_buf_size);
                 if (!log_buf)
-                       return -ENOMEM;
+                       return libbpf_err(-ENOMEM);
  
                 *log_buf = 0;
         }
@@ -1229,7 +1226,7 @@ retry_load:
  
  done:
         free(log_buf);
-       return err;
+       return libbpf_err(err);
  }
  
  int btf__fd(const struct btf *btf)
@@ -1305,7 +1302,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
  
         data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
         if (!data)
-               return NULL;
+               return errno = -ENOMEM, NULL;
  
         btf->raw_size = data_sz;
         if (btf->swapped_endian)
@@ -1323,7 +1320,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
         else if (offset - btf->start_str_off < btf->hdr->str_len)
                 return btf_strs_data(btf) + (offset - btf->start_str_off);
         else
-               return NULL;
+               return errno = EINVAL, NULL;
  }
  
  const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -1388,17 +1385,20 @@ exit_free:
  int btf__get_from_id(__u32 id, struct btf **btf)
  {
         struct btf *res;
-       int btf_fd;
+       int err, btf_fd;
  
         *btf = NULL;
         btf_fd = bpf_btf_get_fd_by_id(id);
         if (btf_fd < 0)
-               return -errno;
+               return libbpf_err(-errno);
  
         res = btf_get_from_fd(btf_fd, NULL);
+       err = libbpf_get_error(res);
+
         close(btf_fd);
-       if (IS_ERR(res))
-               return PTR_ERR(res);
+
+       if (err)
+               return libbpf_err(err);
  
         *btf = res;
         return 0;
@@ -1415,31 +1415,30 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
         __s64 key_size, value_size;
         __s32 container_id;
  
-       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
-           max_name) {
+       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
                 pr_warn("map:%s length of '____btf_map_%s' is too long\n",
                         map_name, map_name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         container_id = btf__find_by_name(btf, container_name);
         if (container_id < 0) {
                 pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
                          map_name, container_name);
-               return container_id;
+               return libbpf_err(container_id);
         }
  
         container_type = btf__type_by_id(btf, container_id);
         if (!container_type) {
                 pr_warn("map:%s cannot find BTF type for container_id:%u\n",
                         map_name, container_id);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
                 pr_warn("map:%s container_name:%s is an invalid container struct\n",
                         map_name, container_name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         key = btf_members(container_type);
@@ -1448,25 +1447,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
         key_size = btf__resolve_size(btf, key->type);
         if (key_size < 0) {
                 pr_warn("map:%s invalid BTF key_type_size\n", map_name);
-               return key_size;
+               return libbpf_err(key_size);
         }
  
         if (expected_key_size != key_size) {
                 pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
                         map_name, (__u32)key_size, expected_key_size);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         value_size = btf__resolve_size(btf, value->type);
         if (value_size < 0) {
                 pr_warn("map:%s invalid BTF value_type_size\n", map_name);
-               return value_size;
+               return libbpf_err(value_size);
         }
  
         if (expected_value_size != value_size) {
                 pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
                         map_name, (__u32)value_size, expected_value_size);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         *key_type_id = key->type;
@@ -1563,11 +1562,11 @@ int btf__find_str(struct btf *btf, const char *s)
  
         /* BTF needs to be in a modifiable state to build string lookup index */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         off = strset__find_str(btf->strs_set, s);
         if (off < 0)
-               return off;
+               return libbpf_err(off);
  
         return btf->start_str_off + off;
  }
@@ -1588,11 +1587,11 @@ int btf__add_str(struct btf *btf, const char *s)
         }
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         off = strset__add_str(btf->strs_set, s);
         if (off < 0)
-               return off;
+               return libbpf_err(off);
  
         btf->hdr->str_len = strset__data_size(btf->strs_set);
  
@@ -1616,7 +1615,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
  
         err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         btf->hdr->type_len += data_sz;
         btf->hdr->str_off += data_sz;
@@ -1653,21 +1652,21 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
  
         sz = btf_type_size(src_type);
         if (sz < 0)
-               return sz;
+               return libbpf_err(sz);
  
         /* deconstruct BTF, if necessary, and invalidate raw_data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         memcpy(t, src_type, sz);
  
         err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         return btf_commit_type(btf, sz);
  }
@@ -1688,21 +1687,21 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
  
         /* non-empty name */
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         /* byte_sz must be power of 2 */
         if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* deconstruct BTF, if necessary, and invalidate raw_data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type) + sizeof(int);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         /* if something goes wrong later, we might end up with an extra string,
          * but that shouldn't be a problem, because BTF can't be constructed
@@ -1736,20 +1735,20 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
  
         /* non-empty name */
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* byte_sz must be one of the explicitly allowed values */
         if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
             byte_sz != 16)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         name_off = btf__add_str(btf, name);
         if (name_off < 0)
@@ -1780,15 +1779,15 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
         int sz, name_off = 0;
  
         if (validate_type_id(ref_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         if (name && name[0]) {
                 name_off = btf__add_str(btf, name);
@@ -1831,15 +1830,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
         int sz;
  
         if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type) + sizeof(struct btf_array);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         t->name_off = 0;
         t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
@@ -1860,12 +1859,12 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
         int sz, name_off = 0;
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         if (name && name[0]) {
                 name_off = btf__add_str(btf, name);
@@ -1943,30 +1942,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
  
         /* last type should be union/struct */
         if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         t = btf_last_type(btf);
         if (!btf_is_composite(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (validate_type_id(type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         /* best-effort bit field offset/size enforcement */
         is_bitfield = bit_size || (bit_offset % 8 != 0);
         if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* only offset 0 is allowed for unions */
         if (btf_is_union(t) && bit_offset)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* decompose and invalidate raw data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_member);
         m = btf_add_type_mem(btf, sz);
         if (!m)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         if (name && name[0]) {
                 name_off = btf__add_str(btf, name);
@@ -2008,15 +2007,15 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
  
         /* byte_sz must be power of 2 */
         if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         if (name && name[0]) {
                 name_off = btf__add_str(btf, name);
@@ -2048,25 +2047,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
  
         /* last type should be BTF_KIND_ENUM */
         if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         t = btf_last_type(btf);
         if (!btf_is_enum(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* non-empty name */
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (value < INT_MIN || value > UINT_MAX)
-               return -E2BIG;
+               return libbpf_err(-E2BIG);
  
         /* decompose and invalidate raw data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_enum);
         v = btf_add_type_mem(btf, sz);
         if (!v)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         name_off = btf__add_str(btf, name);
         if (name_off < 0)
@@ -2096,7 +2095,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
  int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
  {
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         switch (fwd_kind) {
         case BTF_FWD_STRUCT:
@@ -2117,7 +2116,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
                  */
                 return btf__add_enum(btf, name, sizeof(int));
         default:
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  }
  
@@ -2132,7 +2131,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
  int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
  {
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
  }
@@ -2187,10 +2186,10 @@ int btf__add_func(struct btf *btf, const char *name,
         int id;
  
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
             linkage != BTF_FUNC_EXTERN)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
         if (id > 0) {
@@ -2198,7 +2197,7 @@ int btf__add_func(struct btf *btf, const char *name,
  
                 t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
         }
-       return id;
+       return libbpf_err(id);
  }
  
  /*
@@ -2219,15 +2218,15 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
         int sz;
  
         if (validate_type_id(ret_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         /* start out with vlen=0; this will be adjusted when adding enum
          * values, if necessary
@@ -2254,23 +2253,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
         int sz, name_off = 0;
  
         if (validate_type_id(type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* last type should be BTF_KIND_FUNC_PROTO */
         if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         t = btf_last_type(btf);
         if (!btf_is_func_proto(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* decompose and invalidate raw data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_param);
         p = btf_add_type_mem(btf, sz);
         if (!p)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         if (name && name[0]) {
                 name_off = btf__add_str(btf, name);
@@ -2308,21 +2307,21 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
  
         /* non-empty name */
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
             linkage != BTF_VAR_GLOBAL_EXTERN)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (validate_type_id(type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* deconstruct BTF, if necessary, and invalidate raw_data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type) + sizeof(struct btf_var);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         name_off = btf__add_str(btf, name);
         if (name_off < 0)
@@ -2357,15 +2356,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
  
         /* non-empty name */
         if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_type);
         t = btf_add_type_mem(btf, sz);
         if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         name_off = btf__add_str(btf, name);
         if (name_off < 0)
@@ -2397,22 +2396,22 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
  
         /* last type should be BTF_KIND_DATASEC */
         if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         t = btf_last_type(btf);
         if (!btf_is_datasec(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (validate_type_id(var_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* decompose and invalidate raw data */
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         sz = sizeof(struct btf_var_secinfo);
         v = btf_add_type_mem(btf, sz);
         if (!v)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         v->type = var_type_id;
         v->offset = offset;
@@ -2614,11 +2613,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
  
         err = btf_ext_parse_hdr(data, size);
         if (err)
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
  
         btf_ext = calloc(1, sizeof(struct btf_ext));
         if (!btf_ext)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
  
         btf_ext->data_size = size;
         btf_ext->data = malloc(size);
@@ -2628,9 +2627,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
         }
         memcpy(btf_ext->data, data, size);
  
-       if (btf_ext->hdr->hdr_len <
-           offsetofend(struct btf_ext_header, line_info_len))
+       if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+               err = -EINVAL;
                 goto done;
+       }
+
         err = btf_ext_setup_func_info(btf_ext);
         if (err)
                 goto done;
@@ -2639,8 +2640,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
         if (err)
                 goto done;
  
-       if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+       if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
+               err = -EINVAL;
                 goto done;
+       }
+
         err = btf_ext_setup_core_relos(btf_ext);
         if (err)
                 goto done;
@@ -2648,7 +2652,7 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
  done:
         if (err) {
                 btf_ext__free(btf_ext);
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
         }
  
         return btf_ext;
@@ -2687,7 +2691,7 @@ static int btf_ext_reloc_info(const struct btf *btf,
                 existing_len = (*cnt) * record_size;
                 data = realloc(*info, existing_len + records_len);
                 if (!data)
-                       return -ENOMEM;
+                       return libbpf_err(-ENOMEM);
  
                 memcpy(data + existing_len, sinfo->data, records_len);
                 /* adjust insn_off only, the rest data will be passed
@@ -2697,15 +2701,14 @@ static int btf_ext_reloc_info(const struct btf *btf,
                         __u32 *insn_off;
  
                         insn_off = data + existing_len + (i * record_size);
-                       *insn_off = *insn_off / sizeof(struct bpf_insn) +
-                               insns_cnt;
+                       *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
                 }
                 *info = data;
                 *cnt += sinfo->num_info;
                 return 0;
         }
  
-       return -ENOENT;
+       return libbpf_err(-ENOENT);
  }
  
  int btf_ext__reloc_func_info(const struct btf *btf,
@@ -2894,11 +2897,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
  
         if (IS_ERR(d)) {
                 pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         err = btf_dedup_prep(d);
         if (err) {
@@ -2938,7 +2941,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
  
  done:
         btf_dedup_free(d);
-       return err;
+       return libbpf_err(err);
  }
  
  #define BTF_UNPROCESSED_ID ((__u32)-1)
@@ -4411,7 +4414,7 @@ struct btf *libbpf_find_kernel_btf(void)
         char path[PATH_MAX + 1];
         struct utsname buf;
         struct btf *btf;
-       int i;
+       int i, err;
  
         uname(&buf);
  
@@ -4425,17 +4428,16 @@ struct btf *libbpf_find_kernel_btf(void)
                         btf = btf__parse_raw(path);
                 else
                         btf = btf__parse_elf(path, NULL);
-
-               pr_debug("loading kernel BTF '%s': %ld\n",
-                        path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
-               if (IS_ERR(btf))
+               err = libbpf_get_error(btf);
+               pr_debug("loading kernel BTF '%s': %d\n", path, err);
+               if (err)
                         continue;
  
                 return btf;
         }
  
         pr_warn("failed to find valid kernel BTF\n");
-       return ERR_PTR(-ESRCH);
+       return libbpf_err_ptr(-ESRCH);
  }
  
  int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c

index 5e2809d..5dc6b51 100644 (file)
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -128,7 +128,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
  
         d = calloc(1, sizeof(struct btf_dump));
         if (!d)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
  
         d->btf = btf;
         d->btf_ext = btf_ext;
@@ -156,7 +156,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
         return d;
  err:
         btf_dump__free(d);
-       return ERR_PTR(err);
+       return libbpf_err_ptr(err);
  }
  
  static int btf_dump_resize(struct btf_dump *d)
@@ -236,16 +236,16 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
         int err, i;
  
         if (id > btf__get_nr_types(d->btf))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         err = btf_dump_resize(d);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         d->emit_queue_cnt = 0;
         err = btf_dump_order_type(d, id, false);
         if (err < 0)
-               return err;
+               return libbpf_err(err);
  
         for (i = 0; i < d->emit_queue_cnt; i++)
                 btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
@@ -1075,11 +1075,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
         int lvl, err;
  
         if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         err = btf_dump_resize(d);
         if (err)
-               return -EINVAL;
+               return libbpf_err(err);
  
         fname = OPTS_GET(opts, field_name, "");
         lvl = OPTS_GET(opts, indent_level, 0);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c

index 69cd1a8..48c0ade 100644 (file)
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -151,6 +151,23 @@ static inline __u64 ptr_to_u64(const void *ptr)
         return (__u64) (unsigned long) ptr;
  }
  
+/* this goes away in libbpf 1.0 */
+enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
+
+int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
+{
+       /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
+        * get all possible values we compensate last +1, and then (2*x - 1)
+        * to get the bit mask
+        */
+       if (mode != LIBBPF_STRICT_ALL
+           && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
+               return errno = EINVAL, -EINVAL;
+
+       libbpf_mode = mode;
+       return 0;
+}
+
  enum kern_feature_id {
         /* v4.14: kernel support for program & map names. */
         FEAT_PROG_NAME,
@@ -2448,10 +2465,8 @@ static int bpf_object__init_maps(struct bpf_object *obj,
         err = err ?: bpf_object__init_global_data_maps(obj);
         err = err ?: bpf_object__init_kconfig_map(obj);
         err = err ?: bpf_object__init_struct_ops_maps(obj);
-       if (err)
-               return err;
  
-       return 0;
+       return err;
  }
  
  static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -2562,16 +2577,14 @@ static int bpf_object__init_btf(struct bpf_object *obj,
  
         if (btf_data) {
                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
-               if (IS_ERR(obj->btf)) {
-                       err = PTR_ERR(obj->btf);
+               err = libbpf_get_error(obj->btf);
+               if (err) {
                         obj->btf = NULL;
-                       pr_warn("Error loading ELF section %s: %d.\n",
-                               BTF_ELF_SEC, err);
+                       pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
                         goto out;
                 }
                 /* enforce 8-byte pointers for BPF-targeted BTFs */
                 btf__set_pointer_size(obj->btf, 8);
-               err = 0;
         }
         if (btf_ext_data) {
                 if (!obj->btf) {
@@ -2579,11 +2592,11 @@ static int bpf_object__init_btf(struct bpf_object *obj,
                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
                         goto out;
                 }
-               obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
-                                           btf_ext_data->d_size);
-               if (IS_ERR(obj->btf_ext)) {
-                       pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
-                               BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+               obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+               err = libbpf_get_error(obj->btf_ext);
+               if (err) {
+                       pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
+                               BTF_EXT_ELF_SEC, err);
                         obj->btf_ext = NULL;
                         goto out;
                 }
@@ -2667,8 +2680,8 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
                 return 0;
  
         obj->btf_vmlinux = libbpf_find_kernel_btf();
-       if (IS_ERR(obj->btf_vmlinux)) {
-               err = PTR_ERR(obj->btf_vmlinux);
+       err = libbpf_get_error(obj->btf_vmlinux);
+       if (err) {
                 pr_warn("Error loading vmlinux BTF: %d\n", err);
                 obj->btf_vmlinux = NULL;
                 return err;
@@ -2734,8 +2747,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
                 /* clone BTF to sanitize a copy and leave the original intact */
                 raw_data = btf__get_raw_data(obj->btf, &sz);
                 kern_btf = btf__new(raw_data, sz);
-               if (IS_ERR(kern_btf))
-                       return PTR_ERR(kern_btf);
+               err = libbpf_get_error(kern_btf);
+               if (err)
+                       return err;
  
                 /* enforce 8-byte pointers for BPF-targeted BTFs */
                 btf__set_pointer_size(obj->btf, 8);
@@ -3509,7 +3523,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
                 if (pos->sec_name && !strcmp(pos->sec_name, title))
                         return pos;
         }
-       return NULL;
+       return errno = ENOENT, NULL;
  }
  
  static bool prog_is_subprog(const struct bpf_object *obj,
@@ -3542,7 +3556,7 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
                 if (!strcmp(prog->name, name))
                         return prog;
         }
-       return NULL;
+       return errno = ENOENT, NULL;
  }
  
  static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
@@ -3889,11 +3903,11 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
  
         err = bpf_obj_get_info_by_fd(fd, &info, &len);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         new_name = strdup(info.name);
         if (!new_name)
-               return -errno;
+               return libbpf_err(-errno);
  
         new_fd = open("/", O_RDONLY | O_CLOEXEC);
         if (new_fd < 0) {
@@ -3931,7 +3945,7 @@ err_close_new_fd:
         close(new_fd);
  err_free_new_name:
         free(new_name);
-       return err;
+       return libbpf_err(err);
  }
  
  __u32 bpf_map__max_entries(const struct bpf_map *map)
@@ -3942,7 +3956,7 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
  struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
  {
         if (!bpf_map_type__is_map_in_map(map->def.type))
-               return NULL;
+               return errno = EINVAL, NULL;
  
         return map->inner_map;
  }
@@ -3950,7 +3964,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
  int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->def.max_entries = max_entries;
         return 0;
  }
@@ -3958,7 +3972,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
  int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
  {
         if (!map || !max_entries)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         return bpf_map__set_max_entries(map, max_entries);
  }
@@ -3974,6 +3988,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
         };
         int ret;
  
+       if (obj->gen_loader)
+               return 0;
+
         /* make sure basic loading works */
  
         memset(&attr, 0, sizeof(attr));
@@ -4565,7 +4582,7 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
                 targ_map = map->init_slots[i];
                 fd = bpf_map__fd(targ_map);
                 if (obj->gen_loader) {
-                       pr_warn("// TODO map_update_elem: idx %ld key %d value==map_idx %ld\n",
+                       pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
                                 map - obj->maps, i, targ_map - obj->maps);
                         return -ENOTSUP;
                 } else {
@@ -5086,10 +5103,10 @@ static int load_module_btfs(struct bpf_object *obj)
                 }
  
                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
-               if (IS_ERR(btf)) {
-                       pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
-                               name, id, PTR_ERR(btf));
-                       err = PTR_ERR(btf);
+               err = libbpf_get_error(btf);
+               if (err) {
+                       pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
+                               name, id, err);
                         goto err_out;
                 }
  
@@ -6189,7 +6206,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
                 return -EINVAL;
  
         if (prog->obj->gen_loader) {
-               pr_warn("// TODO core_relo: prog %ld insn[%d] %s %s kind %d\n",
+               pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
                         prog - prog->obj->programs, relo->insn_off / 8,
                         local_name, spec_str, relo->kind);
                 return -ENOTSUP;
@@ -6349,8 +6366,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
  
         if (targ_btf_path) {
                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
-               if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
-                       err = PTR_ERR(obj->btf_vmlinux_override);
+               err = libbpf_get_error(obj->btf_vmlinux_override);
+               if (err) {
                         pr_warn("failed to parse target BTF: %d\n", err);
                         return err;
                 }
@@ -7407,7 +7424,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
  
         if (prog->obj->loaded) {
                 pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if ((prog->type == BPF_PROG_TYPE_TRACING ||
@@ -7417,7 +7434,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
  
                 err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
                 if (err)
-                       return err;
+                       return libbpf_err(err);
  
                 prog->attach_btf_obj_fd = btf_obj_fd;
                 prog->attach_btf_id = btf_type_id;
@@ -7427,13 +7444,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
                 if (prog->preprocessor) {
                         pr_warn("Internal error: can't load program '%s'\n",
                                 prog->name);
-                       return -LIBBPF_ERRNO__INTERNAL;
+                       return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
                 }
  
                 prog->instances.fds = malloc(sizeof(int));
                 if (!prog->instances.fds) {
                         pr_warn("Not enough memory for BPF fds\n");
-                       return -ENOMEM;
+                       return libbpf_err(-ENOMEM);
                 }
                 prog->instances.nr = 1;
                 prog->instances.fds[0] = -1;
@@ -7492,7 +7509,7 @@ out:
                 pr_warn("failed to load program '%s'\n", prog->name);
         zfree(&prog->insns);
         prog->insns_cnt = 0;
-       return err;
+       return libbpf_err(err);
  }
  
  static int
@@ -7625,7 +7642,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
  
  struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
  {
-       return __bpf_object__open_xattr(attr, 0);
+       return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
  }
  
  struct bpf_object *bpf_object__open(const char *path)
@@ -7635,18 +7652,18 @@ struct bpf_object *bpf_object__open(const char *path)
                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
         };
  
-       return bpf_object__open_xattr(&attr);
+       return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
  }
  
  struct bpf_object *
  bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
  {
         if (!path)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
  
         pr_debug("loading %s\n", path);
  
-       return __bpf_object__open(path, NULL, 0, opts);
+       return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
  }
  
  struct bpf_object *
@@ -7654,9 +7671,9 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
                      const struct bpf_object_open_opts *opts)
  {
         if (!obj_buf || obj_buf_sz == 0)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
  
-       return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+       return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
  }
  
  struct bpf_object *
@@ -7671,9 +7688,9 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
  
         /* returning NULL is wrong, but backwards-compatible */
         if (!obj_buf || obj_buf_sz == 0)
-               return NULL;
+               return errno = EINVAL, NULL;
  
-       return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+       return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
  }
  
  int bpf_object__unload(struct bpf_object *obj)
@@ -7681,7 +7698,7 @@ int bpf_object__unload(struct bpf_object *obj)
         size_t i;
  
         if (!obj)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         for (i = 0; i < obj->nr_maps; i++) {
                 zclose(obj->maps[i].fd);
@@ -8014,14 +8031,14 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
         int err, i;
  
         if (!attr)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         obj = attr->obj;
         if (!obj)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (obj->loaded) {
                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (obj->gen_loader)
@@ -8072,7 +8089,7 @@ out:
  
         bpf_object__unload(obj);
         pr_warn("failed to load object '%s'\n", obj->path);
-       return err;
+       return libbpf_err(err);
  }
  
  int bpf_object__load(struct bpf_object *obj)
@@ -8144,28 +8161,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
  
         err = make_parent_dir(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         err = check_path(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         if (prog == NULL) {
                 pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (instance < 0 || instance >= prog->instances.nr) {
                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
                         instance, prog->name, prog->instances.nr);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
                 err = -errno;
                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
                 pr_warn("failed to pin program: %s\n", cp);
-               return err;
+               return libbpf_err(err);
         }
         pr_debug("pinned program '%s'\n", path);
  
@@ -8179,22 +8196,23 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
  
         err = check_path(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         if (prog == NULL) {
                 pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (instance < 0 || instance >= prog->instances.nr) {
                 pr_warn("invalid prog instance %d of prog %s (max %d)\n",
                         instance, prog->name, prog->instances.nr);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         err = unlink(path);
         if (err != 0)
-               return -errno;
+               return libbpf_err(-errno);
+
         pr_debug("unpinned program '%s'\n", path);
  
         return 0;
@@ -8206,20 +8224,20 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
  
         err = make_parent_dir(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         err = check_path(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         if (prog == NULL) {
                 pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (prog->instances.nr <= 0) {
                 pr_warn("no instances of prog %s to pin\n", prog->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (prog->instances.nr == 1) {
@@ -8263,7 +8281,7 @@ err_unpin:
  
         rmdir(path);
  
-       return err;
+       return libbpf_err(err);
  }
  
  int bpf_program__unpin(struct bpf_program *prog, const char *path)
@@ -8272,16 +8290,16 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
  
         err = check_path(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         if (prog == NULL) {
                 pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (prog->instances.nr <= 0) {
                 pr_warn("no instances of prog %s to pin\n", prog->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (prog->instances.nr == 1) {
@@ -8295,9 +8313,9 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
  
                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
                 if (len < 0)
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 else if (len >= PATH_MAX)
-                       return -ENAMETOOLONG;
+                       return libbpf_err(-ENAMETOOLONG);
  
                 err = bpf_program__unpin_instance(prog, buf, i);
                 if (err)
@@ -8306,7 +8324,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
  
         err = rmdir(path);
         if (err)
-               return -errno;
+               return libbpf_err(-errno);
  
         return 0;
  }
@@ -8318,14 +8336,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
  
         if (map == NULL) {
                 pr_warn("invalid map pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (map->pin_path) {
                 if (path && strcmp(path, map->pin_path)) {
                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
                                 bpf_map__name(map), map->pin_path, path);
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 } else if (map->pinned) {
                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
                                  bpf_map__name(map), map->pin_path);
@@ -8335,10 +8353,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
                 if (!path) {
                         pr_warn("missing a path to pin map '%s' at\n",
                                 bpf_map__name(map));
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 } else if (map->pinned) {
                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
-                       return -EEXIST;
+                       return libbpf_err(-EEXIST);
                 }
  
                 map->pin_path = strdup(path);
@@ -8350,11 +8368,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
  
         err = make_parent_dir(map->pin_path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         err = check_path(map->pin_path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         if (bpf_obj_pin(map->fd, map->pin_path)) {
                 err = -errno;
@@ -8369,7 +8387,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
  out_err:
         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
         pr_warn("failed to pin map: %s\n", cp);
-       return err;
+       return libbpf_err(err);
  }
  
  int bpf_map__unpin(struct bpf_map *map, const char *path)
@@ -8378,29 +8396,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
  
         if (map == NULL) {
                 pr_warn("invalid map pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         if (map->pin_path) {
                 if (path && strcmp(path, map->pin_path)) {
                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
                                 bpf_map__name(map), map->pin_path, path);
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 }
                 path = map->pin_path;
         } else if (!path) {
                 pr_warn("no path to unpin map '%s' from\n",
                         bpf_map__name(map));
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         err = check_path(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         err = unlink(path);
         if (err != 0)
-               return -errno;
+               return libbpf_err(-errno);
  
         map->pinned = false;
         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
@@ -8415,7 +8433,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
         if (path) {
                 new = strdup(path);
                 if (!new)
-                       return -errno;
+                       return libbpf_err(-errno);
         }
  
         free(map->pin_path);
@@ -8449,11 +8467,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
         int err;
  
         if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
  
         if (!obj->loaded) {
                 pr_warn("object not yet loaded; load it first\n");
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
         }
  
         bpf_object__for_each_map(map, obj) {
@@ -8493,7 +8511,7 @@ err_unpin_maps:
                 bpf_map__unpin(map, NULL);
         }
  
-       return err;
+       return libbpf_err(err);
  }
  
  int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
@@ -8502,7 +8520,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
         int err;
  
         if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
  
         bpf_object__for_each_map(map, obj) {
                 char *pin_path = NULL;
@@ -8514,9 +8532,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
                         len = snprintf(buf, PATH_MAX, "%s/%s", path,
                                        bpf_map__name(map));
                         if (len < 0)
-                               return -EINVAL;
+                               return libbpf_err(-EINVAL);
                         else if (len >= PATH_MAX)
-                               return -ENAMETOOLONG;
+                               return libbpf_err(-ENAMETOOLONG);
                         sanitize_pin_path(buf);
                         pin_path = buf;
                 } else if (!map->pin_path) {
@@ -8525,7 +8543,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
  
                 err = bpf_map__unpin(map, pin_path);
                 if (err)
-                       return err;
+                       return libbpf_err(err);
         }
  
         return 0;
@@ -8537,11 +8555,11 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
         int err;
  
         if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
  
         if (!obj->loaded) {
                 pr_warn("object not yet loaded; load it first\n");
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
         }
  
         bpf_object__for_each_program(prog, obj) {
@@ -8580,7 +8598,7 @@ err_unpin_programs:
                 bpf_program__unpin(prog, buf);
         }
  
-       return err;
+       return libbpf_err(err);
  }
  
  int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
@@ -8589,7 +8607,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
         int err;
  
         if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
  
         bpf_object__for_each_program(prog, obj) {
                 char buf[PATH_MAX];
@@ -8598,13 +8616,13 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
                                prog->pin_name);
                 if (len < 0)
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 else if (len >= PATH_MAX)
-                       return -ENAMETOOLONG;
+                       return libbpf_err(-ENAMETOOLONG);
  
                 err = bpf_program__unpin(prog, buf);
                 if (err)
-                       return err;
+                       return libbpf_err(err);
         }
  
         return 0;
@@ -8616,12 +8634,12 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
  
         err = bpf_object__pin_maps(obj, path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         err = bpf_object__pin_programs(obj, path);
         if (err) {
                 bpf_object__unpin_maps(obj, path);
-               return err;
+               return libbpf_err(err);
         }
  
         return 0;
@@ -8718,7 +8736,7 @@ bpf_object__next(struct bpf_object *prev)
  
  const char *bpf_object__name(const struct bpf_object *obj)
  {
-       return obj ? obj->name : ERR_PTR(-EINVAL);
+       return obj ? obj->name : libbpf_err_ptr(-EINVAL);
  }
  
  unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -8739,7 +8757,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
  int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
  {
         if (obj->loaded)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         obj->kern_version = kern_version;
  
@@ -8759,7 +8777,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
  
  void *bpf_object__priv(const struct bpf_object *obj)
  {
-       return obj ? obj->priv : ERR_PTR(-EINVAL);
+       return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
  }
  
  int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
@@ -8795,7 +8813,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
  
         if (p->obj != obj) {
                 pr_warn("error: program handler doesn't match object\n");
-               return NULL;
+               return errno = EINVAL, NULL;
         }
  
         idx = (p - obj->programs) + (forward ? 1 : -1);
@@ -8841,7 +8859,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
  
  void *bpf_program__priv(const struct bpf_program *prog)
  {
-       return prog ? prog->priv : ERR_PTR(-EINVAL);
+       return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
  }
  
  void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
@@ -8868,7 +8886,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
                 title = strdup(title);
                 if (!title) {
                         pr_warn("failed to strdup program title\n");
-                       return ERR_PTR(-ENOMEM);
+                       return libbpf_err_ptr(-ENOMEM);
                 }
         }
  
@@ -8883,7 +8901,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
  int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
  {
         if (prog->obj->loaded)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         prog->load = autoload;
         return 0;
@@ -8905,17 +8923,17 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
         int *instances_fds;
  
         if (nr_instances <= 0 || !prep)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (prog->instances.nr > 0 || prog->instances.fds) {
                 pr_warn("Can't set pre-processor after loading\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         instances_fds = malloc(sizeof(int) * nr_instances);
         if (!instances_fds) {
                 pr_warn("alloc memory failed for fds\n");
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
         }
  
         /* fill all fd with -1 */
@@ -8932,19 +8950,19 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
         int fd;
  
         if (!prog)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (n >= prog->instances.nr || n < 0) {
                 pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
                         n, prog->name, prog->instances.nr);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         fd = prog->instances.fds[n];
         if (fd < 0) {
                 pr_warn("%dth instance of program '%s' is invalid\n",
                         n, prog->name);
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
         }
  
         return fd;
@@ -8970,7 +8988,7 @@ static bool bpf_program__is_type(const struct bpf_program *prog,
  int bpf_program__set_##NAME(struct bpf_program *prog)          \
  {                                                              \
         if (!prog)                                              \
-               return -EINVAL;                                 \
+               return libbpf_err(-EINVAL);                     \
         bpf_program__set_type(prog, TYPE);                      \
         return 0;                                               \
  }                                                              \
@@ -9060,7 +9078,10 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
  
  static const struct bpf_sec_def section_defs[] = {
         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
-       BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
+       BPF_EAPROG_SEC("sk_reuseport/migrate",  BPF_PROG_TYPE_SK_REUSEPORT,
+                                               BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
+       BPF_EAPROG_SEC("sk_reuseport",          BPF_PROG_TYPE_SK_REUSEPORT,
+                                               BPF_SK_REUSEPORT_SELECT),
         SEC_DEF("kprobe/", KPROBE,
                 .attach_fn = attach_kprobe),
         BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
@@ -9257,7 +9278,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
         char *type_names;
  
         if (!name)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         sec_def = find_sec_def(name);
         if (sec_def) {
@@ -9273,7 +9294,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
                 free(type_names);
         }
  
-       return -ESRCH;
+       return libbpf_err(-ESRCH);
  }
  
  static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
@@ -9471,9 +9492,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
         int err;
  
         btf = libbpf_find_kernel_btf();
-       if (IS_ERR(btf)) {
+       err = libbpf_get_error(btf);
+       if (err) {
                 pr_warn("vmlinux BTF is not found\n");
-               return -EINVAL;
+               return libbpf_err(err);
         }
  
         err = find_attach_btf_id(btf, name, attach_type);
@@ -9481,7 +9503,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
                 pr_warn("%s is not found in vmlinux BTF\n", name);
  
         btf__free(btf);
-       return err;
+       return libbpf_err(err);
  }
  
  static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -9492,10 +9514,11 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
         int err = -EINVAL;
  
         info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
-       if (IS_ERR_OR_NULL(info_linear)) {
+       err = libbpf_get_error(info_linear);
+       if (err) {
                 pr_warn("failed get_prog_info_linear for FD %d\n",
                         attach_prog_fd);
-               return -EINVAL;
+               return err;
         }
         info = &info_linear->info;
         if (!info->btf_id) {
@@ -9616,13 +9639,13 @@ int libbpf_attach_type_by_name(const char *name,
         int i;
  
         if (!name)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
                 if (strncmp(name, section_defs[i].sec, section_defs[i].len))
                         continue;
                 if (!section_defs[i].is_attachable)
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 *attach_type = section_defs[i].expected_attach_type;
                 return 0;
         }
@@ -9633,17 +9656,17 @@ int libbpf_attach_type_by_name(const char *name,
                 free(type_names);
         }
  
-       return -EINVAL;
+       return libbpf_err(-EINVAL);
  }
  
  int bpf_map__fd(const struct bpf_map *map)
  {
-       return map ? map->fd : -EINVAL;
+       return map ? map->fd : libbpf_err(-EINVAL);
  }
  
  const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
  {
-       return map ? &map->def : ERR_PTR(-EINVAL);
+       return map ? &map->def : libbpf_err_ptr(-EINVAL);
  }
  
  const char *bpf_map__name(const struct bpf_map *map)
@@ -9659,7 +9682,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
  int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->def.type = type;
         return 0;
  }
@@ -9672,7 +9695,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
  int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->def.map_flags = flags;
         return 0;
  }
@@ -9685,7 +9708,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
  int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->numa_node = numa_node;
         return 0;
  }
@@ -9698,7 +9721,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
  int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->def.key_size = size;
         return 0;
  }
@@ -9711,7 +9734,7 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
  int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->def.value_size = size;
         return 0;
  }
@@ -9730,7 +9753,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
                      bpf_map_clear_priv_t clear_priv)
  {
         if (!map)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (map->priv) {
                 if (map->clear_priv)
@@ -9744,7 +9767,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
  
  void *bpf_map__priv(const struct bpf_map *map)
  {
-       return map ? map->priv : ERR_PTR(-EINVAL);
+       return map ? map->priv : libbpf_err_ptr(-EINVAL);
  }
  
  int bpf_map__set_initial_value(struct bpf_map *map,
@@ -9752,7 +9775,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
  {
         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
             size != map->def.value_size || map->fd >= 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         memcpy(map->mmaped, data, size);
         return 0;
@@ -9784,7 +9807,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
  int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
  {
         if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         map->map_ifindex = ifindex;
         return 0;
  }
@@ -9793,11 +9816,11 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
  {
         if (!bpf_map_type__is_map_in_map(map->def.type)) {
                 pr_warn("error: unsupported map type\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
         if (map->inner_map_fd != -1) {
                 pr_warn("error: inner_map_fd already specified\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
         zfree(&map->inner_map);
         map->inner_map_fd = fd;
@@ -9811,7 +9834,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
         struct bpf_map *s, *e;
  
         if (!obj || !obj->maps)
-               return NULL;
+               return errno = EINVAL, NULL;
  
         s = obj->maps;
         e = obj->maps + obj->nr_maps;
@@ -9819,7 +9842,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
         if ((m < s) || (m >= e)) {
                 pr_warn("error in %s: map handler doesn't belong to object\n",
                          __func__);
-               return NULL;
+               return errno = EINVAL, NULL;
         }
  
         idx = (m - obj->maps) + i;
@@ -9858,7 +9881,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
                 if (pos->name && !strcmp(pos->name, name))
                         return pos;
         }
-       return NULL;
+       return errno = ENOENT, NULL;
  }
  
  int
@@ -9870,12 +9893,23 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
  struct bpf_map *
  bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
  {
-       return ERR_PTR(-ENOTSUP);
+       return libbpf_err_ptr(-ENOTSUP);
  }
  
  long libbpf_get_error(const void *ptr)
  {
-       return PTR_ERR_OR_ZERO(ptr);
+       if (!IS_ERR_OR_NULL(ptr))
+               return 0;
+
+       if (IS_ERR(ptr))
+               errno = -PTR_ERR(ptr);
+
+       /* If ptr == NULL, then errno should be already set by the failing
+        * API, because libbpf never returns NULL on success and it now always
+        * sets errno on error. So no extra errno handling for ptr == NULL
+        * case.
+        */
+       return -errno;
  }
  
  int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -9901,16 +9935,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
         int err;
  
         if (!attr)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (!attr->file)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         open_attr.file = attr->file;
         open_attr.prog_type = attr->prog_type;
  
         obj = bpf_object__open_xattr(&open_attr);
-       if (IS_ERR_OR_NULL(obj))
-               return -ENOENT;
+       err = libbpf_get_error(obj);
+       if (err)
+               return libbpf_err(-ENOENT);
  
         bpf_object__for_each_program(prog, obj) {
                 enum bpf_attach_type attach_type = attr->expected_attach_type;
@@ -9930,7 +9965,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                          * didn't provide a fallback type, too bad...
                          */
                         bpf_object__close(obj);
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                 }
  
                 prog->prog_ifindex = attr->ifindex;
@@ -9948,13 +9983,13 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
         if (!first_prog) {
                 pr_warn("object file doesn't contain bpf program\n");
                 bpf_object__close(obj);
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
         }
  
         err = bpf_object__load(obj);
         if (err) {
                 bpf_object__close(obj);
-               return err;
+               return libbpf_err(err);
         }
  
         *pobj = obj;
@@ -9973,7 +10008,10 @@ struct bpf_link {
  /* Replace link's underlying BPF program with the new one */
  int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
  {
-       return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+       int ret;
+       
+       ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+       return libbpf_err_errno(ret);
  }
  
  /* Release "ownership" of underlying BPF resource (typically, BPF program
@@ -10006,7 +10044,7 @@ int bpf_link__destroy(struct bpf_link *link)
                 free(link->pin_path);
         free(link);
  
-       return err;
+       return libbpf_err(err);
  }
  
  int bpf_link__fd(const struct bpf_link *link)
@@ -10021,7 +10059,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link)
  
  static int bpf_link__detach_fd(struct bpf_link *link)
  {
-       return close(link->fd);
+       return libbpf_err_errno(close(link->fd));
  }
  
  struct bpf_link *bpf_link__open(const char *path)
@@ -10033,13 +10071,13 @@ struct bpf_link *bpf_link__open(const char *path)
         if (fd < 0) {
                 fd = -errno;
                 pr_warn("failed to open link at %s: %d\n", path, fd);
-               return ERR_PTR(fd);
+               return libbpf_err_ptr(fd);
         }
  
         link = calloc(1, sizeof(*link));
         if (!link) {
                 close(fd);
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         }
         link->detach = &bpf_link__detach_fd;
         link->fd = fd;
@@ -10047,7 +10085,7 @@ struct bpf_link *bpf_link__open(const char *path)
         link->pin_path = strdup(path);
         if (!link->pin_path) {
                 bpf_link__destroy(link);
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         }
  
         return link;
@@ -10063,22 +10101,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path)
         int err;
  
         if (link->pin_path)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
         err = make_parent_dir(path);
         if (err)
-               return err;
+               return libbpf_err(err);
         err = check_path(path);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         link->pin_path = strdup(path);
         if (!link->pin_path)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
  
         if (bpf_obj_pin(link->fd, link->pin_path)) {
                 err = -errno;
                 zfree(&link->pin_path);
-               return err;
+               return libbpf_err(err);
         }
  
         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
@@ -10090,11 +10128,11 @@ int bpf_link__unpin(struct bpf_link *link)
         int err;
  
         if (!link->pin_path)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         err = unlink(link->pin_path);
         if (err != 0)
-               return -errno;
+               return libbpf_err_errno(err);
  
         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
         zfree(&link->pin_path);
@@ -10110,11 +10148,10 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
                 err = -errno;
  
         close(link->fd);
-       return err;
+       return libbpf_err(err);
  }
  
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
-                                               int pfd)
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
  {
         char errmsg[STRERR_BUFSIZE];
         struct bpf_link *link;
@@ -10123,18 +10160,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
         if (pfd < 0) {
                 pr_warn("prog '%s': invalid perf event FD %d\n",
                         prog->name, pfd);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
         prog_fd = bpf_program__fd(prog);
         if (prog_fd < 0) {
                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
                         prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         link = calloc(1, sizeof(*link));
         if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         link->detach = &bpf_link__detach_perf_event;
         link->fd = pfd;
  
@@ -10146,14 +10183,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
                 if (err == -EPROTO)
                         pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
                                 prog->name, pfd);
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
         }
         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
                 err = -errno;
                 free(link);
                 pr_warn("prog '%s': failed to enable pfd %d: %s\n",
                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
         }
         return link;
  }
@@ -10277,16 +10314,16 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
         }
         link = bpf_program__attach_perf_event(prog, pfd);
-       if (IS_ERR(link)) {
+       err = libbpf_get_error(link);
+       if (err) {
                 close(pfd);
-               err = PTR_ERR(link);
                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return link;
+               return libbpf_err_ptr(err);
         }
         return link;
  }
@@ -10319,17 +10356,17 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
                         prog->name, retprobe ? "uretprobe" : "uprobe",
                         binary_path, func_offset,
                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
         }
         link = bpf_program__attach_perf_event(prog, pfd);
-       if (IS_ERR(link)) {
+       err = libbpf_get_error(link);
+       if (err) {
                 close(pfd);
-               err = PTR_ERR(link);
                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
                         prog->name, retprobe ? "uretprobe" : "uprobe",
                         binary_path, func_offset,
                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return link;
+               return libbpf_err_ptr(err);
         }
         return link;
  }
@@ -10397,16 +10434,16 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
                         prog->name, tp_category, tp_name,
                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
         }
         link = bpf_program__attach_perf_event(prog, pfd);
-       if (IS_ERR(link)) {
+       err = libbpf_get_error(link);
+       if (err) {
                 close(pfd);
-               err = PTR_ERR(link);
                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
                         prog->name, tp_category, tp_name,
                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return link;
+               return libbpf_err_ptr(err);
         }
         return link;
  }
@@ -10419,20 +10456,19 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
  
         sec_name = strdup(prog->sec_name);
         if (!sec_name)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
  
         /* extract "tp/<category>/<name>" */
         tp_cat = sec_name + sec->len;
         tp_name = strchr(tp_cat, '/');
         if (!tp_name) {
-               link = ERR_PTR(-EINVAL);
-               goto out;
+               free(sec_name);
+               return libbpf_err_ptr(-EINVAL);
         }
         *tp_name = '\0';
         tp_name++;
  
         link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
-out:
         free(sec_name);
         return link;
  }
@@ -10447,12 +10483,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
         prog_fd = bpf_program__fd(prog);
         if (prog_fd < 0) {
                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         link = calloc(1, sizeof(*link));
         if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         link->detach = &bpf_link__detach_fd;
  
         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
@@ -10461,7 +10497,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
                 free(link);
                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
         }
         link->fd = pfd;
         return link;
@@ -10485,12 +10521,12 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
         prog_fd = bpf_program__fd(prog);
         if (prog_fd < 0) {
                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         link = calloc(1, sizeof(*link));
         if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         link->detach = &bpf_link__detach_fd;
  
         pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
@@ -10499,7 +10535,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
                 free(link);
                 pr_warn("prog '%s': failed to attach: %s\n",
                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
         }
         link->fd = pfd;
         return (struct bpf_link *)link;
@@ -10527,12 +10563,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
         return bpf_program__attach_lsm(prog);
  }
  
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
-                                   struct bpf_program *prog)
-{
-       return bpf_program__attach_iter(prog, NULL);
-}
-
  static struct bpf_link *
  bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
                        const char *target_name)
@@ -10547,12 +10577,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
         prog_fd = bpf_program__fd(prog);
         if (prog_fd < 0) {
                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         link = calloc(1, sizeof(*link));
         if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         link->detach = &bpf_link__detach_fd;
  
         attach_type = bpf_program__get_expected_attach_type(prog);
@@ -10563,7 +10593,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
                 pr_warn("prog '%s': failed to attach to %s: %s\n",
                         prog->name, target_name,
                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(link_fd);
+               return libbpf_err_ptr(link_fd);
         }
         link->fd = link_fd;
         return link;
@@ -10596,19 +10626,19 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
         if (!!target_fd != !!attach_func_name) {
                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
                         prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         if (prog->type != BPF_PROG_TYPE_EXT) {
                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
                         prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         if (target_fd) {
                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
                 if (btf_id < 0)
-                       return ERR_PTR(btf_id);
+                       return libbpf_err_ptr(btf_id);
  
                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
         } else {
@@ -10630,7 +10660,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
         __u32 target_fd = 0;
  
         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
  
         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
@@ -10638,12 +10668,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
         prog_fd = bpf_program__fd(prog);
         if (prog_fd < 0) {
                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
         }
  
         link = calloc(1, sizeof(*link));
         if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
         link->detach = &bpf_link__detach_fd;
  
         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
@@ -10653,19 +10683,25 @@ bpf_program__attach_iter(struct bpf_program *prog,
                 free(link);
                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(link_fd);
+               return libbpf_err_ptr(link_fd);
         }
         link->fd = link_fd;
         return link;
  }
  
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+                                   struct bpf_program *prog)
+{
+       return bpf_program__attach_iter(prog, NULL);
+}
+
  struct bpf_link *bpf_program__attach(struct bpf_program *prog)
  {
         const struct bpf_sec_def *sec_def;
  
         sec_def = find_sec_def(prog->sec_name);
         if (!sec_def || !sec_def->attach_fn)
-               return ERR_PTR(-ESRCH);
+               return libbpf_err_ptr(-ESRCH);
  
         return sec_def->attach_fn(sec_def, prog);
  }
@@ -10688,11 +10724,11 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
         int err;
  
         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
  
         link = calloc(1, sizeof(*link));
         if (!link)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
  
         st_ops = map->st_ops;
         for (i = 0; i < btf_vlen(st_ops->type); i++) {
@@ -10712,7 +10748,7 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
         if (err) {
                 err = -errno;
                 free(link);
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
         }
  
         link->detach = bpf_link__detach_struct_ops;
@@ -10766,7 +10802,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
         }
  
         ring_buffer_write_tail(header, data_tail);
-       return ret;
+       return libbpf_err(ret);
  }
  
  struct perf_buffer;
@@ -10919,7 +10955,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
         p.lost_cb = opts ? opts->lost_cb : NULL;
         p.ctx = opts ? opts->ctx : NULL;
  
-       return __perf_buffer__new(map_fd, page_cnt, &p);
+       return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
  }
  
  struct perf_buffer *
@@ -10935,7 +10971,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
         p.cpus = opts->cpus;
         p.map_keys = opts->map_keys;
  
-       return __perf_buffer__new(map_fd, page_cnt, &p);
+       return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
  }
  
  static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
@@ -11156,16 +11192,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
         int i, cnt, err;
  
         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+       if (cnt < 0)
+               return libbpf_err_errno(cnt);
+
         for (i = 0; i < cnt; i++) {
                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
  
                 err = perf_buffer__process_records(pb, cpu_buf);
                 if (err) {
                         pr_warn("error while processing records: %d\n", err);
-                       return err;
+                       return libbpf_err(err);
                 }
         }
-       return cnt < 0 ? -errno : cnt;
+       return cnt;
  }
  
  /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
@@ -11186,11 +11225,11 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
         struct perf_cpu_buf *cpu_buf;
  
         if (buf_idx >= pb->cpu_cnt)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         cpu_buf = pb->cpu_bufs[buf_idx];
         if (!cpu_buf)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
  
         return cpu_buf->fd;
  }
@@ -11208,11 +11247,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
         struct perf_cpu_buf *cpu_buf;
  
         if (buf_idx >= pb->cpu_cnt)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         cpu_buf = pb->cpu_bufs[buf_idx];
         if (!cpu_buf)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
  
         return perf_buffer__process_records(pb, cpu_buf);
  }
@@ -11230,7 +11269,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
                 err = perf_buffer__process_records(pb, cpu_buf);
                 if (err) {
                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
-                       return err;
+                       return libbpf_err(err);
                 }
         }
         return 0;
@@ -11342,13 +11381,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
         void *ptr;
  
         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
  
         /* step 1: get array dimensions */
         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
         if (err) {
                 pr_debug("can't get prog info: %s", strerror(errno));
-               return ERR_PTR(-EFAULT);
+               return libbpf_err_ptr(-EFAULT);
         }
  
         /* step 2: calculate total size of all arrays */
@@ -11380,7 +11419,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
         data_len = roundup(data_len, sizeof(__u64));
         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
         if (!info_linear)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
  
         /* step 4: fill data to info_linear->info */
         info_linear->arrays = arrays;
@@ -11412,7 +11451,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
         if (err) {
                 pr_debug("can't get prog info: %s", strerror(errno));
                 free(info_linear);
-               return ERR_PTR(-EFAULT);
+               return libbpf_err_ptr(-EFAULT);
         }
  
         /* step 6: verify the data */
@@ -11491,26 +11530,26 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
         int btf_obj_fd = 0, btf_id = 0, err;
  
         if (!prog || attach_prog_fd < 0 || !attach_func_name)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (prog->obj->loaded)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (attach_prog_fd) {
                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
                                                  attach_prog_fd);
                 if (btf_id < 0)
-                       return btf_id;
+                       return libbpf_err(btf_id);
         } else {
                 /* load btf_vmlinux, if not yet */
                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
                 if (err)
-                       return err;
+                       return libbpf_err(err);
                 err = find_kernel_btf_id(prog->obj, attach_func_name,
                                          prog->expected_attach_type,
                                          &btf_obj_fd, &btf_id);
                 if (err)
-                       return err;
+                       return libbpf_err(err);
         }
  
         prog->attach_btf_id = btf_id;
@@ -11609,7 +11648,7 @@ int libbpf_num_possible_cpus(void)
  
         err = parse_cpu_mask_file(fcpu, &mask, &n);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         tmp_cpus = 0;
         for (i = 0; i < n; i++) {
@@ -11629,7 +11668,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
                 .object_name = s->name,
         );
         struct bpf_object *obj;
-       int i;
+       int i, err;
  
         /* Attempt to preserve opts->object_name, unless overriden by user
          * explicitly. Overwriting object name for skeletons is discouraged,
@@ -11644,10 +11683,11 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
         }
  
         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
-       if (IS_ERR(obj)) {
-               pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
-                       s->name, PTR_ERR(obj));
-               return PTR_ERR(obj);
+       err = libbpf_get_error(obj);
+       if (err) {
+               pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
+                       s->name, err);
+               return libbpf_err(err);
         }
  
         *s->obj = obj;
@@ -11660,7 +11700,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
                 *map = bpf_object__find_map_by_name(obj, name);
                 if (!*map) {
                         pr_warn("failed to find skeleton map '%s'\n", name);
-                       return -ESRCH;
+                       return libbpf_err(-ESRCH);
                 }
  
                 /* externs shouldn't be pre-setup from user code */
@@ -11675,7 +11715,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
                 *prog = bpf_object__find_program_by_name(obj, name);
                 if (!*prog) {
                         pr_warn("failed to find skeleton program '%s'\n", name);
-                       return -ESRCH;
+                       return libbpf_err(-ESRCH);
                 }
         }
  
@@ -11689,7 +11729,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
         err = bpf_object__load(*s->obj);
         if (err) {
                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
-               return err;
+               return libbpf_err(err);
         }
  
         for (i = 0; i < s->map_cnt; i++) {
@@ -11728,7 +11768,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
                         *mmaped = NULL;
                         pr_warn("failed to re-mmap() map '%s': %d\n",
                                  bpf_map__name(map), err);
-                       return err;
+                       return libbpf_err(err);
                 }
         }
  
@@ -11737,7 +11777,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
  
  int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
  {
-       int i;
+       int i, err;
  
         for (i = 0; i < s->prog_cnt; i++) {
                 struct bpf_program *prog = *s->progs[i].prog;
@@ -11752,10 +11792,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
                         continue;
  
                 *link = sec_def->attach_fn(sec_def, prog);
-               if (IS_ERR(*link)) {
-                       pr_warn("failed to auto-attach program '%s': %ld\n",
-                               bpf_program__name(prog), PTR_ERR(*link));
-                       return PTR_ERR(*link);
+               err = libbpf_get_error(*link);
+               if (err) {
+                       pr_warn("failed to auto-attach program '%s': %d\n",
+                               bpf_program__name(prog), err);
+                       return libbpf_err(err);
                 }
         }
  
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h

index d985235..6e61342 100644 (file)
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -18,6 +18,7 @@
  #include <linux/bpf.h>
  
  #include "libbpf_common.h"
+#include "libbpf_legacy.h"
  
  #ifdef __cplusplus
  extern "C" {
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map

index 2abef6f..944c99d 100644 (file)
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -359,9 +359,7 @@ LIBBPF_0.4.0 {
                 bpf_linker__finalize;
                 bpf_linker__free;
                 bpf_linker__new;
-               bpf_map__initial_value;
                 bpf_map__inner_map;
-               bpf_object__gen_loader;
                 bpf_object__set_kversion;
                 bpf_tc_attach;
                 bpf_tc_detach;
@@ -369,3 +367,11 @@ LIBBPF_0.4.0 {
                 bpf_tc_hook_destroy;
                 bpf_tc_query;
  } LIBBPF_0.3.0;
+
+LIBBPF_0.5.0 {
+       global:
+               bpf_map__initial_value;
+               bpf_map_lookup_and_delete_elem_flags;
+               bpf_object__gen_loader;
+               libbpf_set_strict_mode;
+} LIBBPF_0.4.0;
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c

index 0afb51f..96f67a7 100644 (file)
--- a/tools/lib/bpf/libbpf_errno.c
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -12,6 +12,7 @@
  #include <string.h>
  
  #include "libbpf.h"
+#include "libbpf_internal.h"
  
  /* make sure libbpf doesn't use kernel-only integer typedefs */
  #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -39,7 +40,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
  int libbpf_strerror(int err, char *buf, size_t size)
  {
         if (!buf || !size)
-               return -1;
+               return libbpf_err(-EINVAL);
  
         err = err > 0 ? err : -err;
  
@@ -48,7 +49,7 @@ int libbpf_strerror(int err, char *buf, size_t size)
  
                 ret = strerror_r(err, buf, size);
                 buf[size - 1] = '\0';
-               return ret;
+               return libbpf_err_errno(ret);
         }
  
         if (err < __LIBBPF_ERRNO__END) {
@@ -62,5 +63,5 @@ int libbpf_strerror(int err, char *buf, size_t size)
  
         snprintf(buf, size, "Unknown libbpf error %d", err);
         buf[size - 1] = '\0';
-       return -1;
+       return libbpf_err(-ENOENT);
  }
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h

index a2cc297..016ca7c 100644 (file)
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -11,6 +11,9 @@
  
  #include <stdlib.h>
  #include <limits.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "libbpf_legacy.h"
  
  /* make sure libbpf doesn't use kernel-only integer typedefs */
  #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -28,6 +31,12 @@
  #ifndef R_BPF_64_64
  #define R_BPF_64_64 1
  #endif
+#ifndef R_BPF_64_ABS64
+#define R_BPF_64_ABS64 2
+#endif
+#ifndef R_BPF_64_ABS32
+#define R_BPF_64_ABS32 3
+#endif
  #ifndef R_BPF_64_32
  #define R_BPF_64_32 10
  #endif
@@ -435,4 +444,54 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
  int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
  int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
  
+extern enum libbpf_strict_mode libbpf_mode;
+
+/* handle direct returned errors */
+static inline int libbpf_err(int ret)
+{
+       if (ret < 0)
+               errno = -ret;
+       return ret;
+}
+
+/* handle errno-based (e.g., syscall or libc) errors according to libbpf's
+ * strict mode settings
+ */
+static inline int libbpf_err_errno(int ret)
+{
+       if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
+               /* errno is already assumed to be set on error */
+               return ret < 0 ? -errno : ret;
+
+       /* legacy: on error return -1 directly and don't touch errno */
+       return ret;
+}
+
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libbpf_err_ptr(int err)
+{
+       /* set errno on error, this doesn't break anything */
+       errno = -err;
+
+       if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+               return NULL;
+
+       /* legacy: encode err as ptr */
+       return ERR_PTR(err);
+}
+
+/* handle pointer-returning APIs' error handling */
+static inline void *libbpf_ptr(void *ret)
+{
+       /* set errno on error, this doesn't break anything */
+       if (IS_ERR(ret))
+               errno = -PTR_ERR(ret);
+
+       if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+               return IS_ERR(ret) ? NULL : ret;
+
+       /* legacy: pass-through original pointer */
+       return ret;
+}
+
  #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h

new file mode 100644 (file)

index 0000000..df0d03d
--- /dev/null
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0])
+ *
+ *   [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY
+ *
+ * Copyright (C) 2021 Facebook
+ */
+#ifndef __LIBBPF_LEGACY_BPF_H
+#define __LIBBPF_LEGACY_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum libbpf_strict_mode {
+       /* Turn on all supported strict features of libbpf to simulate libbpf
+        * v1.0 behavior.
+        * This will be the default behavior in libbpf v1.0.
+        */
+       LIBBPF_STRICT_ALL = 0xffffffff,
+
+       /*
+        * Disable any libbpf 1.0 behaviors. This is the default before libbpf
+        * v1.0. It won't be supported anymore in v1.0, please update your
+        * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
+        */
+       LIBBPF_STRICT_NONE = 0x00,
+       /*
+        * Return NULL pointers on error, not ERR_PTR(err).
+        * Additionally, libbpf also always sets errno to corresponding Exx
+        * (positive) error code.
+        */
+       LIBBPF_STRICT_CLEAN_PTRS = 0x01,
+       /*
+        * Return actual error codes from low-level APIs directly, not just -1.
+        * Additionally, libbpf also always sets errno to corresponding Exx
+        * (positive) error code.
+        */
+       LIBBPF_STRICT_DIRECT_ERRS = 0x02,
+
+       __LIBBPF_STRICT_LAST,
+};
+
+LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LEGACY_BPF_H */
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c

index b594a88..10911a8 100644 (file)
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -220,16 +220,16 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
         int err;
  
         if (!OPTS_VALID(opts, bpf_linker_opts))
-               return NULL;
+               return errno = EINVAL, NULL;
  
         if (elf_version(EV_CURRENT) == EV_NONE) {
                 pr_warn_elf("libelf initialization failed");
-               return NULL;
+               return errno = EINVAL, NULL;
         }
  
         linker = calloc(1, sizeof(*linker));
         if (!linker)
-               return NULL;
+               return errno = ENOMEM, NULL;
  
         linker->fd = -1;
  
@@ -241,7 +241,7 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
  
  err_out:
         bpf_linker__free(linker);
-       return NULL;
+       return errno = -err, NULL;
  }
  
  static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
@@ -444,10 +444,10 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
         int err = 0;
  
         if (!OPTS_VALID(opts, bpf_linker_file_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (!linker->elf)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
         err = err ?: linker_append_sec_data(linker, &obj);
@@ -467,7 +467,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
         if (obj.fd >= 0)
                 close(obj.fd);
  
-       return err;
+       return libbpf_err(err);
  }
  
  static bool is_dwarf_sec_name(const char *name)
@@ -892,7 +892,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se
                 size_t sym_idx = ELF64_R_SYM(relo->r_info);
                 size_t sym_type = ELF64_R_TYPE(relo->r_info);
  
-               if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+               if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
+                   sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
                         pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
                                 i, sec->sec_idx, sym_type, obj->filename);
                         return -EINVAL;
@@ -2547,11 +2548,11 @@ int bpf_linker__finalize(struct bpf_linker *linker)
         int err, i;
  
         if (!linker->elf)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         err = finalize_btf(linker);
         if (err)
-               return err;
+               return libbpf_err(err);
  
         /* Finalize strings */
         strs_sz = strset__data_size(linker->strtab_strs);
@@ -2583,14 +2584,14 @@ int bpf_linker__finalize(struct bpf_linker *linker)
         if (elf_update(linker->elf, ELF_C_NULL) < 0) {
                 err = -errno;
                 pr_warn_elf("failed to finalize ELF layout");
-               return err;
+               return libbpf_err(err);
         }
  
         /* Write out final ELF contents */
         if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
                 err = -errno;
                 pr_warn_elf("failed to write ELF contents");
-               return err;
+               return libbpf_err(err);
         }
  
         elf_end(linker->elf);
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c

index 4744458..cf9381f 100644 (file)
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -225,22 +225,26 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
  int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                              const struct bpf_xdp_set_link_opts *opts)
  {
-       int old_fd = -1;
+       int old_fd = -1, ret;
  
         if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         if (OPTS_HAS(opts, old_fd)) {
                 old_fd = OPTS_GET(opts, old_fd, -1);
                 flags |= XDP_FLAGS_REPLACE;
         }
  
-       return __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+       ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+       return libbpf_err(ret);
  }
  
  int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
  {
-       return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+       int ret;
+
+       ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+       return libbpf_err(ret);
  }
  
  static int __dump_link_nlmsg(struct nlmsghdr *nlh,
@@ -321,13 +325,13 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
         };
  
         if (flags & ~XDP_FLAGS_MASK || !info_size)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         /* Check whether the single {HW,DRV,SKB} mode is set */
         flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
         mask = flags - 1;
         if (flags && flags & mask)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         xdp_id.ifindex = ifindex;
         xdp_id.flags = flags;
@@ -341,7 +345,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
                 memset((void *) info + sz, 0, info_size - sz);
         }
  
-       return ret;
+       return libbpf_err(ret);
  }
  
  static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
@@ -369,7 +373,7 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
         if (!ret)
                 *prog_id = get_xdp_id(&info, flags);
  
-       return ret;
+       return libbpf_err(ret);
  }
  
  typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t,
@@ -453,7 +457,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
  
  static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
  {
-       return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE);
+       return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
  }
  
  static int tc_qdisc_delete(struct bpf_tc_hook *hook)
@@ -463,11 +467,14 @@ static int tc_qdisc_delete(struct bpf_tc_hook *hook)
  
  int bpf_tc_hook_create(struct bpf_tc_hook *hook)
  {
+       int ret;
+
         if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
             OPTS_GET(hook, ifindex, 0) <= 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
-       return tc_qdisc_create_excl(hook);
+       ret = tc_qdisc_create_excl(hook);
+       return libbpf_err(ret);
  }
  
  static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
@@ -478,18 +485,18 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
  {
         if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
             OPTS_GET(hook, ifindex, 0) <= 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         switch (OPTS_GET(hook, attach_point, 0)) {
         case BPF_TC_INGRESS:
         case BPF_TC_EGRESS:
-               return __bpf_tc_detach(hook, NULL, true);
+               return libbpf_err(__bpf_tc_detach(hook, NULL, true));
         case BPF_TC_INGRESS | BPF_TC_EGRESS:
-               return tc_qdisc_delete(hook);
+               return libbpf_err(tc_qdisc_delete(hook));
         case BPF_TC_CUSTOM:
-               return -EOPNOTSUPP;
+               return libbpf_err(-EOPNOTSUPP);
         default:
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  }
  
@@ -574,7 +581,7 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
         if (!hook || !opts ||
             !OPTS_VALID(hook, bpf_tc_hook) ||
             !OPTS_VALID(opts, bpf_tc_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         ifindex      = OPTS_GET(hook, ifindex, 0);
         parent       = OPTS_GET(hook, parent, 0);
@@ -587,11 +594,11 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
         flags        = OPTS_GET(opts, flags, 0);
  
         if (ifindex <= 0 || !prog_fd || prog_id)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (priority > UINT16_MAX)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (flags & ~BPF_TC_F_REPLACE)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
         protocol = ETH_P_ALL;
@@ -608,32 +615,32 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
  
         ret = tc_get_tcm_parent(attach_point, &parent);
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         req.tc.tcm_parent = parent;
  
         ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS);
         if (!nla)
-               return -EMSGSIZE;
+               return libbpf_err(-EMSGSIZE);
         ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd);
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
         ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags,
                          sizeof(bpf_flags));
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         nlattr_end_nested(&req.nh, nla);
  
         info.opts = opts;
  
         ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         if (!info.processed)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
         return ret;
  }
  
@@ -668,8 +675,6 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
                 return -EINVAL;
         if (priority > UINT16_MAX)
                 return -EINVAL;
-       if (flags & ~BPF_TC_F_REPLACE)
-               return -EINVAL;
         if (!flush) {
                 if (!handle || !priority)
                         return -EINVAL;
@@ -708,7 +713,13 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
  int bpf_tc_detach(const struct bpf_tc_hook *hook,
                   const struct bpf_tc_opts *opts)
  {
-       return !opts ? -EINVAL : __bpf_tc_detach(hook, opts, false);
+       int ret;
+
+       if (!opts)
+               return libbpf_err(-EINVAL);
+
+       ret = __bpf_tc_detach(hook, opts, false);
+       return libbpf_err(ret);
  }
  
  int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
@@ -725,7 +736,7 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
         if (!hook || !opts ||
             !OPTS_VALID(hook, bpf_tc_hook) ||
             !OPTS_VALID(opts, bpf_tc_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         ifindex      = OPTS_GET(hook, ifindex, 0);
         parent       = OPTS_GET(hook, parent, 0);
@@ -739,9 +750,9 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
  
         if (ifindex <= 0 || flags || prog_fd || prog_id ||
             !handle || !priority)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         if (priority > UINT16_MAX)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
  
         protocol = ETH_P_ALL;
  
@@ -756,19 +767,19 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
  
         ret = tc_get_tcm_parent(attach_point, &parent);
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         req.tc.tcm_parent = parent;
  
         ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
  
         info.opts = opts;
  
         ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
         if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
         if (!info.processed)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
         return ret;
  }
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c

index 1d80ad4..8bc117b 100644 (file)
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -69,23 +69,23 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                 err = -errno;
                 pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
                         map_fd, err);
-               return err;
+               return libbpf_err(err);
         }
  
         if (info.type != BPF_MAP_TYPE_RINGBUF) {
                 pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
                         map_fd);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
         }
  
         tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
         if (!tmp)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
         rb->rings = tmp;
  
         tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
         if (!tmp)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
         rb->events = tmp;
  
         r = &rb->rings[rb->ring_cnt];
@@ -103,7 +103,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                 err = -errno;
                 pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
                         map_fd, err);
-               return err;
+               return libbpf_err(err);
         }
         r->consumer_pos = tmp;
  
@@ -118,7 +118,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                 ringbuf_unmap_ring(rb, r);
                 pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
                         map_fd, err);
-               return err;
+               return libbpf_err(err);
         }
         r->producer_pos = tmp;
         r->data = tmp + rb->page_size;
@@ -133,7 +133,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                 ringbuf_unmap_ring(rb, r);
                 pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
                         map_fd, err);
-               return err;
+               return libbpf_err(err);
         }
  
         rb->ring_cnt++;
@@ -165,11 +165,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
         int err;
  
         if (!OPTS_VALID(opts, ring_buffer_opts))
-               return NULL;
+               return errno = EINVAL, NULL;
  
         rb = calloc(1, sizeof(*rb));
         if (!rb)
-               return NULL;
+               return errno = ENOMEM, NULL;
  
         rb->page_size = getpagesize();
  
@@ -188,7 +188,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
  
  err_out:
         ring_buffer__free(rb);
-       return NULL;
+       return errno = -err, NULL;
  }
  
  static inline int roundup_len(__u32 len)
@@ -260,7 +260,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
  
                 err = ringbuf_process_ring(ring);
                 if (err < 0)
-                       return err;
+                       return libbpf_err(err);
                 res += err;
         }
         if (res > INT_MAX)
@@ -279,7 +279,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
  
         cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
         if (cnt < 0)
-               return -errno;
+               return libbpf_err(-errno);
  
         for (i = 0; i < cnt; i++) {
                 __u32 ring_id = rb->events[i].data.fd;
@@ -287,7 +287,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
  
                 err = ringbuf_process_ring(ring);
                 if (err < 0)
-                       return err;
+                       return libbpf_err(err);
                 res += err;
         }
         if (res > INT_MAX)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore

index a030aa4..addcfd8 100644 (file)
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf
  fixdep
  test_dev_cgroup
  /test_progs*
+!test_progs.h
  test_verifier_log
  feature
  test_sock
@@ -37,4 +38,6 @@ test_cpp
  /runqslower
  /bench
  *.ko
+*.tmp
  xdpxceiver
+xdp_redirect_multi
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile

index 525e4b3..f405b20 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \
  # Order correspond to 'make run_tests' order
  TEST_PROGS := test_kmod.sh \
         test_xdp_redirect.sh \
+       test_xdp_redirect_multi.sh \
         test_xdp_meta.sh \
         test_xdp_veth.sh \
         test_offload.py \
@@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
  TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
         flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
         test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-       xdpxceiver
+       xdpxceiver xdp_redirect_multi
  
  TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
  
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs

index ccf2600..eb6a4fe 100644 (file)
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst
  ifndef RST2MAN_DEP
         $$(error "rst2man not found, but required to generate man pages")
  endif
-       $$(QUIET_GEN)rst2man $$< > $$@
+       $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+       $$(QUIET_GEN)mv $$@.tmp $$@
  
  docs-clean-$1:
         $$(call QUIET_CLEAN, eBPF_$1-manpage)
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst

index 3353778..8deec1c 100644 (file)
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use
  Clang that contains the fix.
  
  __ https://reviews.llvm.org/D100362
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_  made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+  libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c

index 332ed2f..6ea15b9 100644 (file)
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -43,6 +43,7 @@ void setup_libbpf()
  {
         int err;
  
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
         libbpf_set_print(libbpf_print_fn);
  
         err = bump_memlock_rlimit();
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c

index a967674..c7ec114 100644 (file)
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog)
         struct bpf_link *link;
  
         link = bpf_program__attach(prog);
-       if (IS_ERR(link)) {
+       if (!link) {
                 fprintf(stderr, "failed to attach program!\n");
                 exit(1);
         }
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c

index bde6c9d..d167bff 100644 (file)
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
         }
  
         link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
-       if (IS_ERR(link)) {
+       if (!link) {
                 fprintf(stderr, "failed to attach program!\n");
                 exit(1);
         }
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
         }
  
         link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
-       if (IS_ERR(link)) {
+       if (!link) {
                 fprintf(stderr, "failed to attach program\n");
                 exit(1);
         }
@@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup()
         }
  
         link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
-       if (IS_ERR(link)) {
+       if (!link) {
                 fprintf(stderr, "failed to attach program\n");
                 exit(1);
         }
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c

index 2a0b6c9..f41a491 100644 (file)
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog)
         struct bpf_link *link;
  
         link = bpf_program__attach(prog);
-       if (IS_ERR(link)) {
+       if (!link) {
                 fprintf(stderr, "failed to attach program!\n");
                 exit(1);
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c

index 9dc4e3d..ec11e20 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -85,16 +85,14 @@ void test_attach_probe(void)
         kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
                                                  false /* retprobe */,
                                                  SYS_NANOSLEEP_KPROBE_NAME);
-       if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
-                 "err %ld\n", PTR_ERR(kprobe_link)))
+       if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
                 goto cleanup;
         skel->links.handle_kprobe = kprobe_link;
  
         kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
                                                     true /* retprobe */,
                                                     SYS_NANOSLEEP_KPROBE_NAME);
-       if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
-                 "err %ld\n", PTR_ERR(kretprobe_link)))
+       if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
                 goto cleanup;
         skel->links.handle_kretprobe = kretprobe_link;
  
@@ -103,8 +101,7 @@ void test_attach_probe(void)
                                                  0 /* self pid */,
                                                  "/proc/self/exe",
                                                  uprobe_offset);
-       if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
-                 "err %ld\n", PTR_ERR(uprobe_link)))
+       if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
                 goto cleanup;
         skel->links.handle_uprobe = uprobe_link;
  
@@ -113,8 +110,7 @@ void test_attach_probe(void)
                                                     -1 /* any pid */,
                                                     "/proc/self/exe",
                                                     uprobe_offset);
-       if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
-                 "err %ld\n", PTR_ERR(uretprobe_link)))
+       if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
                 goto cleanup;
         skel->links.handle_uretprobe = uretprobe_link;
  
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c

index 2d3590c..1f1aade 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog)
         int iter_fd, len;
  
         link = bpf_program__attach_iter(prog, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 return;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
         int ret = 0;
  
         link = bpf_program__attach_iter(prog, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 return ret;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -396,7 +396,7 @@ static void test_file_iter(void)
                 return;
  
         link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         /* unlink this path if it exists. */
@@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
         skel->bss->map2_id = map_info.id;
  
         link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto free_map2;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -607,14 +607,12 @@ static void test_bpf_hash_map(void)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(!IS_ERR(link), "attach_iter",
-                 "attach_iter for hashmap2 unexpected succeeded\n"))
+       if (!ASSERT_ERR_PTR(link, "attach_iter"))
                 goto out;
  
         linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(!IS_ERR(link), "attach_iter",
-                 "attach_iter for hashmap3 unexpected succeeded\n"))
+       if (!ASSERT_ERR_PTR(link, "attach_iter"))
                 goto out;
  
         /* hashmap1 should be good, update map values here */
@@ -636,7 +634,7 @@ static void test_bpf_hash_map(void)
  
         linfo.map.map_fd = map_fd;
         link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -798,7 +796,7 @@ static void test_bpf_array_map(void)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void)
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
                                         &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+       if (!ASSERT_ERR_PTR(link, "attach_iter"))
                 bpf_link__destroy(link);
  
         bpf_iter_test_kern5__destroy(skel);
@@ -1186,8 +1184,7 @@ static void test_task_vma(void)
         skel->links.proc_maps = bpf_program__attach_iter(
                 skel->progs.proc_maps, NULL);
  
-       if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
-                 "attach iterator failed\n")) {
+       if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
                 skel->links.proc_maps = NULL;
                 goto out;
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c

index e25917f..efe1e97 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -82,7 +82,7 @@ static void *server(void *arg)
               bytes, total_bytes, nr_sent, errno);
  
  done:
-       if (fd != -1)
+       if (fd >= 0)
                 close(fd);
         if (err) {
                 WRITE_ONCE(stop, 1);
@@ -191,8 +191,7 @@ static void test_cubic(void)
                 return;
  
         link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
-       if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
-                 PTR_ERR(link))) {
+       if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
                 bpf_cubic__destroy(cubic_skel);
                 return;
         }
@@ -213,8 +212,7 @@ static void test_dctcp(void)
                 return;
  
         link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
-       if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
-                 PTR_ERR(link))) {
+       if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
                 bpf_dctcp__destroy(dctcp_skel);
                 return;
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c

index 0457ae3..857e3f2 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num)
                               always_log);
         free(raw_btf);
  
-       err = ((btf_fd == -1) != test->btf_load_err);
+       err = ((btf_fd < 0) != test->btf_load_err);
         if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
                   btf_fd, test->btf_load_err) ||
             CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num)
                 goto done;
         }
  
-       if (err || btf_fd == -1)
+       if (err || btf_fd < 0)
                 goto done;
  
         create_attr.name = test->map_name;
@@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num)
  
         map_fd = bpf_create_map_xattr(&create_attr);
  
-       err = ((map_fd == -1) != test->map_create_err);
+       err = ((map_fd < 0) != test->map_create_err);
         CHECK(err, "map_fd:%d test->map_create_err:%u",
               map_fd, test->map_create_err);
  
  done:
         if (*btf_log_buf && (err || always_log))
                 fprintf(stderr, "\n%s", btf_log_buf);
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                 close(btf_fd);
-       if (map_fd != -1)
+       if (map_fd >= 0)
                 close(map_fd);
  }
  
@@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num)
         btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
                               btf_log_buf, BTF_LOG_BUF_SIZE,
                               always_log);
-       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -3987,7 +3987,7 @@ done:
         free(raw_btf);
         free(user_btf);
  
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                 close(btf_fd);
  
         return err;
@@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num)
         btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
                                  btf_log_buf, BTF_LOG_BUF_SIZE,
                                  always_log);
-       if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num)
         }
  
         btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
-       if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num)
         create_attr.btf_value_type_id = 2;
  
         map_fd = bpf_create_map_xattr(&create_attr);
-       if (CHECK(map_fd == -1, "errno:%d", errno)) {
+       if (CHECK(map_fd < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num)
  
         /* Test BTF ID is removed from the kernel */
         btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-       if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num)
         close(map_fd);
         map_fd = -1;
         btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-       if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+       if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
                 err = -1;
                 goto done;
         }
@@ -4117,11 +4117,11 @@ done:
                 fprintf(stderr, "\n%s", btf_log_buf);
  
         free(raw_btf);
-       if (map_fd != -1)
+       if (map_fd >= 0)
                 close(map_fd);
         for (i = 0; i < 2; i++) {
                 free(user_btf[i]);
-               if (btf_fd[i] != -1)
+               if (btf_fd[i] >= 0)
                         close(btf_fd[i]);
         }
  
@@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num)
         btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
                               btf_log_buf, BTF_LOG_BUF_SIZE,
                               always_log);
-       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4212,7 +4212,7 @@ done:
         free(raw_btf);
         free(user_btf);
  
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                 close(btf_fd);
  }
  
@@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num)
                 return;
  
         btf = btf__parse_elf(test->file, &btf_ext);
-       if (IS_ERR(btf)) {
-               if (PTR_ERR(btf) == -ENOENT) {
+       err = libbpf_get_error(btf);
+       if (err) {
+               if (err == -ENOENT) {
                         printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
                         test__skip();
                         return;
@@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num)
         btf_ext__free(btf_ext);
  
         obj = bpf_object__open(test->file);
-       if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+       err = libbpf_get_error(obj);
+       if (CHECK(err, "obj: %d", err))
                 return;
  
         prog = bpf_program__next(NULL, obj);
@@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num)
         info_len = sizeof(struct bpf_prog_info);
         err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
  
-       if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
                 fprintf(stderr, "%s\n", btf_log_buf);
                 err = -1;
                 goto done;
@@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num)
  
         err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
  
-       if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
                 fprintf(stderr, "%s\n", btf_log_buf);
                 err = -1;
                 goto done;
@@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num)
                               always_log);
         free(raw_btf);
  
-       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num)
         create_attr.btf_value_type_id = test->value_type_id;
  
         map_fd = bpf_create_map_xattr(&create_attr);
-       if (CHECK(map_fd == -1, "errno:%d", errno)) {
+       if (CHECK(map_fd < 0, "errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num)
  
                                         err = check_line(expected_line, nexpected_line,
                                                          sizeof(expected_line), line);
-                                       if (err == -1)
+                                       if (err < 0)
                                                 goto done;
                                 }
  
@@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num)
                                                                   cpu, cmapv);
                         err = check_line(expected_line, nexpected_line,
                                          sizeof(expected_line), line);
-                       if (err == -1)
+                       if (err < 0)
                                 goto done;
  
                         cmapv = cmapv + rounded_value_size;
@@ -5036,9 +5038,9 @@ done:
                 fprintf(stderr, "OK");
         if (*btf_log_buf && (err || always_log))
                 fprintf(stderr, "\n%s", btf_log_buf);
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                 close(btf_fd);
-       if (map_fd != -1)
+       if (map_fd >= 0)
                 close(map_fd);
         if (pin_file)
                 fclose(pin_file);
@@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
         /* get necessary lens */
         info_len = sizeof(struct bpf_prog_info);
         err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
                 fprintf(stderr, "%s\n", btf_log_buf);
                 return -1;
         }
@@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
         info.func_info_rec_size = rec_size;
         info.func_info = ptr_to_u64(func_info);
         err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
                 fprintf(stderr, "%s\n", btf_log_buf);
                 err = -1;
                 goto done;
@@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
  
         info_len = sizeof(struct bpf_prog_info);
         err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+       if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
                 err = -1;
                 goto done;
         }
@@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
          * Only recheck the info.*line_info* fields.
          * Other fields are not the concern of this test.
          */
-       if (CHECK(err == -1 ||
+       if (CHECK(err < 0 ||
                   info.nr_line_info != cnt ||
                   (jited_cnt && !info.jited_line_info) ||
                   info.nr_jited_line_info != jited_cnt ||
@@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num)
                               always_log);
         free(raw_btf);
  
-       if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+       if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
                 err = -1;
                 goto done;
         }
@@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num)
         patched_linfo = patch_name_tbd(test->line_info,
                                        test->str_sec, linfo_str_off,
                                        test->str_sec_size, &linfo_size);
-       if (IS_ERR(patched_linfo)) {
+       err = libbpf_get_error(patched_linfo);
+       if (err) {
                 fprintf(stderr, "error in creating raw bpf_line_info");
                 err = -1;
                 goto done;
@@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num)
         }
  
         prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
-       err = ((prog_fd == -1) != test->expected_prog_load_failure);
+       err = ((prog_fd < 0) != test->expected_prog_load_failure);
         if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
                   prog_fd, test->expected_prog_load_failure, errno) ||
             CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num)
                 goto done;
         }
  
-       if (prog_fd == -1)
+       if (prog_fd < 0)
                 goto done;
  
         err = test_get_finfo(test, prog_fd);
@@ -6323,12 +6326,12 @@ done:
         if (*btf_log_buf && (err || always_log))
                 fprintf(stderr, "\n%s", btf_log_buf);
  
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                 close(btf_fd);
-       if (prog_fd != -1)
+       if (prog_fd >= 0)
                 close(prog_fd);
  
-       if (!IS_ERR(patched_linfo))
+       if (!libbpf_get_error(patched_linfo))
                 free(patched_linfo);
  }
  
@@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num)
                 return;
  
         test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+       err = libbpf_get_error(test_btf);
         free(raw_btf);
-       if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
-                 PTR_ERR(test_btf))) {
+       if (CHECK(err, "invalid test_btf errno:%d", err)) {
                 err = -1;
                 goto done;
         }
@@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num)
         if (!raw_btf)
                 return;
         expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+       err = libbpf_get_error(expect_btf);
         free(raw_btf);
-       if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
-                 PTR_ERR(expect_btf))) {
+       if (CHECK(err, "invalid expect_btf errno:%d", err)) {
                 err = -1;
                 goto done;
         }
@@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num)
         }
  
  done:
-       if (!IS_ERR(test_btf))
-               btf__free(test_btf);
-       if (!IS_ERR(expect_btf))
-               btf__free(expect_btf);
+       btf__free(test_btf);
+       btf__free(expect_btf);
  }
  
  void test_btf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c

index 5e129dc..1b90e68 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf,
         int err = 0, id;
  
         d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
-       if (IS_ERR(d))
-               return PTR_ERR(d);
+       err = libbpf_get_error(d);
+       if (err)
+               return err;
  
         for (id = 1; id <= type_cnt; id++) {
                 err = btf_dump__dump_type(d, id);
@@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
         snprintf(test_file, sizeof(test_file), "%s.o", t->file);
  
         btf = btf__parse_elf(test_file, NULL);
-       if (CHECK(IS_ERR(btf), "btf_parse_elf",
-           "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+       if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
                 err = -PTR_ERR(btf);
                 btf = NULL;
                 goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c

index f36da15..022c7d8 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,8 +4,6 @@
  #include <bpf/btf.h>
  #include "btf_helpers.h"
  
-static int duration = 0;
-
  void test_btf_write() {
         const struct btf_var_secinfo *vi;
         const struct btf_type *t;
@@ -16,7 +14,7 @@ void test_btf_write() {
         int id, err, str_off;
  
         btf = btf__new_empty();
-       if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+       if (!ASSERT_OK_PTR(btf, "new_empty"))
                 return;
  
         str_off = btf__find_str(btf, "int");
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c

index 643dfa3..876be0e 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
          */
         parent_link = bpf_program__attach_cgroup(obj->progs.egress,
                                                  parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
-                 "err %ld", PTR_ERR(parent_link)))
+       if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
                 goto close_bpf_object;
         err = connect_send(CHILD_CGROUP);
         if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
          */
         child_link = bpf_program__attach_cgroup(obj->progs.egress,
                                                 child_cgroup_fd);
-       if (CHECK(IS_ERR(child_link), "child-cg-attach",
-                 "err %ld", PTR_ERR(child_link)))
+       if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
                 goto close_bpf_object;
         err = connect_send(CHILD_CGROUP);
         if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
                 goto close_bpf_object;
  
  close_bpf_object:
-       if (!IS_ERR(parent_link))
-               bpf_link__destroy(parent_link);
-       if (!IS_ERR(child_link))
-               bpf_link__destroy(child_link);
+       bpf_link__destroy(parent_link);
+       bpf_link__destroy(child_link);
  
         cg_storage_multi_egress_only__destroy(obj);
  }
@@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
          */
         parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                          parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress1_link)))
+       if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
                 goto close_bpf_object;
         parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                          parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress2_link)))
+       if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
                 goto close_bpf_object;
         parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                          parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
-                 "err %ld", PTR_ERR(parent_ingress_link)))
+       if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
                 goto close_bpf_object;
         err = connect_send(CHILD_CGROUP);
         if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
          */
         child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                         child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
-                 "err %ld", PTR_ERR(child_egress1_link)))
+       if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
                 goto close_bpf_object;
         child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                         child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
-                 "err %ld", PTR_ERR(child_egress2_link)))
+       if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
                 goto close_bpf_object;
         child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                         child_cgroup_fd);
-       if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
-                 "err %ld", PTR_ERR(child_ingress_link)))
+       if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
                 goto close_bpf_object;
         err = connect_send(CHILD_CGROUP);
         if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
                 goto close_bpf_object;
  
  close_bpf_object:
-       if (!IS_ERR(parent_egress1_link))
-               bpf_link__destroy(parent_egress1_link);
-       if (!IS_ERR(parent_egress2_link))
-               bpf_link__destroy(parent_egress2_link);
-       if (!IS_ERR(parent_ingress_link))
-               bpf_link__destroy(parent_ingress_link);
-       if (!IS_ERR(child_egress1_link))
-               bpf_link__destroy(child_egress1_link);
-       if (!IS_ERR(child_egress2_link))
-               bpf_link__destroy(child_egress2_link);
-       if (!IS_ERR(child_ingress_link))
-               bpf_link__destroy(child_ingress_link);
+       bpf_link__destroy(parent_egress1_link);
+       bpf_link__destroy(parent_egress2_link);
+       bpf_link__destroy(parent_ingress_link);
+       bpf_link__destroy(child_egress1_link);
+       bpf_link__destroy(child_egress2_link);
+       bpf_link__destroy(child_ingress_link);
  
         cg_storage_multi_isolated__destroy(obj);
  }
@@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
          */
         parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                          parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress1_link)))
+       if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
                 goto close_bpf_object;
         parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                          parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress2_link)))
+       if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
                 goto close_bpf_object;
         parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                          parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
-                 "err %ld", PTR_ERR(parent_ingress_link)))
+       if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
                 goto close_bpf_object;
         err = connect_send(CHILD_CGROUP);
         if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
          */
         child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                         child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
-                 "err %ld", PTR_ERR(child_egress1_link)))
+       if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
                 goto close_bpf_object;
         child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                         child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
-                 "err %ld", PTR_ERR(child_egress2_link)))
+       if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
                 goto close_bpf_object;
         child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                         child_cgroup_fd);
-       if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
-                 "err %ld", PTR_ERR(child_ingress_link)))
+       if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
                 goto close_bpf_object;
         err = connect_send(CHILD_CGROUP);
         if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
                 goto close_bpf_object;
  
  close_bpf_object:
-       if (!IS_ERR(parent_egress1_link))
-               bpf_link__destroy(parent_egress1_link);
-       if (!IS_ERR(parent_egress2_link))
-               bpf_link__destroy(parent_egress2_link);
-       if (!IS_ERR(parent_ingress_link))
-               bpf_link__destroy(parent_ingress_link);
-       if (!IS_ERR(child_egress1_link))
-               bpf_link__destroy(child_egress1_link);
-       if (!IS_ERR(child_egress2_link))
-               bpf_link__destroy(child_egress2_link);
-       if (!IS_ERR(child_ingress_link))
-               bpf_link__destroy(child_ingress_link);
+       bpf_link__destroy(parent_egress1_link);
+       bpf_link__destroy(parent_egress2_link);
+       bpf_link__destroy(parent_ingress_link);
+       bpf_link__destroy(child_egress1_link);
+       bpf_link__destroy(child_egress2_link);
+       bpf_link__destroy(child_ingress_link);
  
         cg_storage_multi_shared__destroy(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c

index 0a1fc98..20bb883 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
         prog_cnt = 2;
         CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
                                   BPF_F_QUERY_EFFECTIVE, &attach_flags,
-                                 prog_ids, &prog_cnt) != -1);
+                                 prog_ids, &prog_cnt) >= 0);
         CHECK_FAIL(errno != ENOSPC);
         CHECK_FAIL(prog_cnt != 4);
         /* check that prog_ids are returned even when buffer is too small */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c

index 736796e..9091524 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -65,8 +65,7 @@ void test_cgroup_link(void)
         for (i = 0; i < cg_nr; i++) {
                 links[i] = bpf_program__attach_cgroup(skel->progs.egress,
                                                       cgs[i].fd);
-               if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
-                                i, PTR_ERR(links[i])))
+               if (!ASSERT_OK_PTR(links[i], "cg_attach"))
                         goto cleanup;
         }
  
@@ -121,8 +120,7 @@ void test_cgroup_link(void)
  
         links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
                                                     cgs[last_cg].fd);
-       if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
-                 PTR_ERR(links[last_cg])))
+       if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
                 goto cleanup;
  
         ping_and_check(cg_nr + 1, 0);
@@ -147,7 +145,7 @@ void test_cgroup_link(void)
         /* attempt to mix in with multi-attach bpf_link */
         tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
                                               cgs[last_cg].fd);
-       if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+       if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
                 bpf_link__destroy(tmp_link);
                 goto cleanup;
         }
@@ -165,8 +163,7 @@ void test_cgroup_link(void)
         /* attach back link-based one */
         links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
                                                     cgs[last_cg].fd);
-       if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
-                 PTR_ERR(links[last_cg])))
+       if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
                 goto cleanup;
  
         ping_and_check(cg_nr, 0);
@@ -249,8 +246,7 @@ cleanup:
                                  BPF_CGROUP_INET_EGRESS);
  
         for (i = 0; i < cg_nr; i++) {
-               if (!IS_ERR(links[i]))
-                       bpf_link__destroy(links[i]);
+               bpf_link__destroy(links[i]);
         }
         test_cgroup_link__destroy(skel);
  
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c

index 464edc1..b9dc4ec 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
                 goto cleanup;
  
         link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
-       if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "cgroup_attach"))
                 goto cleanup;
  
         run_lookup_test(&skel->bss->g_serv_port, out_sk);
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c

index b62a393..012068f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void)
         prog = skel->progs.xdp_use_helper_basic;
  
         link = bpf_program__attach_xdp(prog, IFINDEX_LO);
-       if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                 goto out;
         skel->links.xdp_use_helper_basic = link;
  
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c

index 6077108..d02e064 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
         const char *name;
         int i;
  
-       if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
-           CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+       if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
                 btf__free(local_btf);
                 btf__free(targ_btf);
                 return -EINVAL;
@@ -848,8 +847,7 @@ void test_core_reloc(void)
                 }
  
                 obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
-               if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
-                         test_case->bpf_obj_file, PTR_ERR(obj)))
+               if (!ASSERT_OK_PTR(obj, "obj_open"))
                         continue;
  
                 probe_name = "raw_tracepoint/sys_enter";
@@ -899,8 +897,7 @@ void test_core_reloc(void)
                 data->my_pid_tgid = my_pid_tgid;
  
                 link = bpf_program__attach_raw_tracepoint(prog, tp_name);
-               if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
-                         PTR_ERR(link)))
+               if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                         goto cleanup;
  
                 /* trigger test run */
@@ -941,10 +938,8 @@ cleanup:
                         CHECK_FAIL(munmap(mmap_data, mmap_sz));
                         mmap_data = NULL;
                 }
-               if (!IS_ERR_OR_NULL(link)) {
-                       bpf_link__destroy(link);
-                       link = NULL;
-               }
+               bpf_link__destroy(link);
+               link = NULL;
                 bpf_object__close(obj);
         }
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c

index 6399084..73b4c76 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
  
  close_prog:
         for (i = 0; i < prog_cnt; i++)
-               if (!IS_ERR_OR_NULL(link[i]))
-                       bpf_link__destroy(link[i]);
-       if (!IS_ERR_OR_NULL(obj))
-               bpf_object__close(obj);
+               bpf_link__destroy(link[i]);
+       bpf_object__close(obj);
         bpf_object__close(tgt_obj);
         free(link);
         free(prog);
@@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj)
                 return err;
  
         link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
-       if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+       if (!ASSERT_OK_PTR(link, "second_link"))
                 goto out;
  
         err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
@@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void)
         opts.attach_prog_fd = pkt_fd;
  
         freplace_obj = bpf_object__open_file(freplace_name, &opts);
-       if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
-                 "failed to open %s: %ld\n", freplace_name,
-                 PTR_ERR(freplace_obj)))
+       if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
                 goto out;
  
         err = bpf_object__load(freplace_obj);
@@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void)
  
         prog = bpf_program__next(NULL, freplace_obj);
         freplace_link = bpf_program__attach_trace(prog);
-       if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+       if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
                 goto out;
  
         opts.attach_prog_fd = bpf_program__fd(prog);
         fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
-       if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
-                 "failed to open %s: %ld\n", fmod_ret_name,
-                 PTR_ERR(fmod_obj)))
+       if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
                 goto out;
  
         err = bpf_object__load(fmod_obj);
@@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file,
                            );
  
         obj = bpf_object__open_file(obj_file, &opts);
-       if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
-                 "failed to open %s: %ld\n", obj_file,
-                 PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open"))
                 goto close_prog;
  
         /* It should fail to load the program */
@@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file,
                 goto close_prog;
  
  close_prog:
-       if (!IS_ERR_OR_NULL(obj))
-               bpf_object__close(obj);
+       bpf_object__close(obj);
         bpf_object__close(pkt_obj);
  }
  
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c

index cd6dc80..225714f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
                 return;
  
         link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
-       if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_netns"))
                 goto out_close;
  
         run_tests_skb_less(tap_fd, skel->maps.last_dissection);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c

index 172c586..3931ede 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
         /* Expect failure creating link when another link exists */
         errno = 0;
         link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
-       if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+       if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
                 perror("bpf_prog_attach(prog2) expected E2BIG");
-       if (link2 != -1)
+       if (link2 >= 0)
                 close(link2);
         CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
  
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
         /* Expect failure creating link when prog attached */
         errno = 0;
         link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
-       if (CHECK_FAIL(link != -1 || errno != EEXIST))
+       if (CHECK_FAIL(link >= 0 || errno != EEXIST))
                 perror("bpf_link_create(prog2) expected EEXIST");
-       if (link != -1)
+       if (link >= 0)
                 close(link);
         CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
  
@@ -623,7 +623,7 @@ static void run_tests(int netns)
         }
  out_close:
         for (i = 0; i < ARRAY_SIZE(progs); i++) {
-               if (progs[i] != -1)
+               if (progs[i] >= 0)
                         CHECK_FAIL(close(progs[i]));
         }
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c

index 9257222..522237a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void)
                 goto close_prog;
  
         link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
-       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                 goto close_prog;
  
         pb_opts.sample_cb = get_stack_print_output;
         pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                 goto close_prog;
  
         /* trigger some syscall action */
@@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void)
         }
  
  close_prog:
-       if (!IS_ERR_OR_NULL(link))
-               bpf_link__destroy(link);
-       if (!IS_ERR_OR_NULL(pb))
-               perf_buffer__free(pb);
+       bpf_link__destroy(link);
+       perf_buffer__free(pb);
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c

index d884b2e..8d5a602 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
  
         skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                            pmu_fd);
-       CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
-             "should have failed\n");
+       ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
         close(pmu_fd);
  
         /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void)
  
         skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                            pmu_fd);
-       CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
-             "err: %ld\n", PTR_ERR(skel->links.oncpu));
+       ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
         close(pmu_fd);
  
         /* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void)
  
         skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                            pmu_fd);
-       CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
-             "should have failed\n");
+       ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
         close(pmu_fd);
  
  cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c

index 428d488..4747ab1 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -48,8 +48,7 @@ static void test_hashmap_generic(void)
         struct hashmap *map;
  
         map = hashmap__new(hash_fn, equal_fn, NULL);
-       if (CHECK(IS_ERR(map), "hashmap__new",
-                 "failed to create map: %ld\n", PTR_ERR(map)))
+       if (!ASSERT_OK_PTR(map, "hashmap__new"))
                 return;
  
         for (i = 0; i < ELEM_CNT; i++) {
@@ -267,8 +266,7 @@ static void test_hashmap_multimap(void)
  
         /* force collisions */
         map = hashmap__new(collision_hash_fn, equal_fn, NULL);
-       if (CHECK(IS_ERR(map), "hashmap__new",
-                 "failed to create map: %ld\n", PTR_ERR(map)))
+       if (!ASSERT_OK_PTR(map, "hashmap__new"))
                 return;
  
         /* set up multimap:
@@ -339,8 +337,7 @@ static void test_hashmap_empty()
  
         /* force collisions */
         map = hashmap__new(hash_fn, equal_fn, NULL);
-       if (CHECK(IS_ERR(map), "hashmap__new",
-                 "failed to create map: %ld\n", PTR_ERR(map)))
+       if (!ASSERT_OK_PTR(map, "hashmap__new"))
                 goto cleanup;
  
         if (CHECK(hashmap__size(map) != 0, "hashmap__size",
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c

index d651079..ddfb6bf 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -97,15 +97,13 @@ void test_kfree_skb(void)
                 goto close_prog;
  
         link = bpf_program__attach_raw_tracepoint(prog, NULL);
-       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                 goto close_prog;
         link_fentry = bpf_program__attach_trace(fentry);
-       if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
-                 PTR_ERR(link_fentry)))
+       if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
                 goto close_prog;
         link_fexit = bpf_program__attach_trace(fexit);
-       if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
-                 PTR_ERR(link_fexit)))
+       if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
                 goto close_prog;
  
         perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
@@ -116,7 +114,7 @@ void test_kfree_skb(void)
         pb_opts.sample_cb = on_sample;
         pb_opts.ctx = &passed;
         pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                 goto close_prog;
  
         memcpy(skb.cb, &cb, sizeof(cb));
@@ -144,12 +142,9 @@ void test_kfree_skb(void)
         CHECK_FAIL(!test_ok[0] || !test_ok[1]);
  close_prog:
         perf_buffer__free(pb);
-       if (!IS_ERR_OR_NULL(link))
-               bpf_link__destroy(link);
-       if (!IS_ERR_OR_NULL(link_fentry))
-               bpf_link__destroy(link_fentry);
-       if (!IS_ERR_OR_NULL(link_fexit))
-               bpf_link__destroy(link_fexit);
+       bpf_link__destroy(link);
+       bpf_link__destroy(link_fentry);
+       bpf_link__destroy(link_fexit);
         bpf_object__close(obj);
         bpf_object__close(obj2);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c

index b58b775..67bebd3 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -87,8 +87,7 @@ void test_ksyms_btf(void)
         struct btf *btf;
  
         btf = libbpf_find_kernel_btf();
-       if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
-                 PTR_ERR(btf)))
+       if (!ASSERT_OK_PTR(btf, "btf_exists"))
                 return;
  
         percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c

index a743288..6fc97c4 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
         int err, i;
  
         link = bpf_program__attach(prog);
-       if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                 goto cleanup;
  
         bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
  
         /* re-open link from BPFFS */
         link = bpf_link__open(link_pin_path);
-       if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_open"))
                 goto cleanup;
  
         CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
         CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
  
  cleanup:
-       if (!IS_ERR(link))
-               bpf_link__destroy(link);
+       bpf_link__destroy(link);
  }
  
  void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c

new file mode 100644 (file)

index 0000000..beebfa9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+       __u64 key, value = START_VALUE;
+       int err;
+
+       for (key = 1; key < MAX_ENTRIES + 1; key++) {
+               err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+       __u64 key, value[nr_cpus];
+       int i, err;
+
+       for (i = 0; i < nr_cpus; i++)
+               value[i] = START_VALUE;
+
+       for (key = 1; key < MAX_ENTRIES + 1; key++) {
+               err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+                                                int *map_fd)
+{
+       struct test_lookup_and_delete *skel;
+       int err;
+
+       skel = test_lookup_and_delete__open();
+       if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+               return NULL;
+
+       err = bpf_map__set_type(skel->maps.hash_map, map_type);
+       if (!ASSERT_OK(err, "bpf_map__set_type"))
+               goto cleanup;
+
+       err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+       if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+               goto cleanup;
+
+       err = test_lookup_and_delete__load(skel);
+       if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+               goto cleanup;
+
+       *map_fd = bpf_map__fd(skel->maps.hash_map);
+       if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+               goto cleanup;
+
+       return skel;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+       return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+                     __u64 value)
+{
+       int err;
+
+       skel->bss->set_pid = getpid();
+       skel->bss->set_key = key;
+       skel->bss->set_value = value;
+
+       err = test_lookup_and_delete__attach(skel);
+       if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+               return -1;
+
+       syscall(__NR_getpgid);
+
+       test_lookup_and_delete__detach(skel);
+
+       return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, value;
+       int map_fd, err;
+
+       /* Setup program and fill the map. */
+       skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values(map_fd);
+       if (!ASSERT_OK(err, "fill_values"))
+               goto cleanup;
+
+       /* Lookup and delete element. */
+       key = 1;
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+               goto cleanup;
+
+       /* Fetched value should match the initially set value. */
+       if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+                 "unexpected value=%lld\n", value))
+               goto cleanup;
+
+       /* Check that the entry is non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, val, value[nr_cpus];
+       int map_fd, err, i;
+
+       /* Setup program and fill the map. */
+       skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values_percpu(map_fd);
+       if (!ASSERT_OK(err, "fill_values_percpu"))
+               goto cleanup;
+
+       /* Lookup and delete element. */
+       key = 1;
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+               goto cleanup;
+
+       for (i = 0; i < nr_cpus; i++) {
+               val = value[i];
+
+               /* Fetched value should match the initially set value. */
+               if (CHECK(val != START_VALUE, "map value",
+                         "unexpected for cpu %d: %lld\n", i, val))
+                       goto cleanup;
+       }
+
+       /* Check that the entry is non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, value;
+       int map_fd, err;
+
+       /* Setup program and fill the LRU map. */
+       skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values(map_fd);
+       if (!ASSERT_OK(err, "fill_values"))
+               goto cleanup;
+
+       /* Insert new element at key=3, should reuse LRU element. */
+       key = 3;
+       err = trigger_tp(skel, key, NEW_VALUE);
+       if (!ASSERT_OK(err, "trigger_tp"))
+               goto cleanup;
+
+       /* Lookup and delete element 3. */
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+               goto cleanup;
+
+       /* Value should match the new value. */
+       if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+                 "unexpected value=%lld\n", value))
+               goto cleanup;
+
+       /* Check that entries 3 and 1 are non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+       key = 1;
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, val, value[nr_cpus];
+       int map_fd, err, i, cpucnt = 0;
+
+       /* Setup program and fill the LRU map. */
+       skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values_percpu(map_fd);
+       if (!ASSERT_OK(err, "fill_values_percpu"))
+               goto cleanup;
+
+       /* Insert new element at key=3, should reuse LRU element 1. */
+       key = 3;
+       err = trigger_tp(skel, key, NEW_VALUE);
+       if (!ASSERT_OK(err, "trigger_tp"))
+               goto cleanup;
+
+       /* Clean value. */
+       for (i = 0; i < nr_cpus; i++)
+               value[i] = 0;
+
+       /* Lookup and delete element 3. */
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+               goto cleanup;
+       }
+
+       /* Check if only one CPU has set the value. */
+       for (i = 0; i < nr_cpus; i++) {
+               val = value[i];
+               if (val) {
+                       if (CHECK(val != NEW_VALUE, "map value",
+                                 "unexpected for cpu %d: %lld\n", i, val))
+                               goto cleanup;
+                       cpucnt++;
+               }
+       }
+       if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+                 cpucnt))
+               goto cleanup;
+
+       /* Check that entries 3 and 1 are non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+       key = 1;
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+       nr_cpus = bpf_num_possible_cpus();
+
+       if (test__start_subtest("lookup_and_delete"))
+               test_lookup_and_delete_hash();
+       if (test__start_subtest("lookup_and_delete_percpu"))
+               test_lookup_and_delete_percpu_hash();
+       if (test__start_subtest("lookup_and_delete_lru"))
+               test_lookup_and_delete_lru_hash();
+       if (test__start_subtest("lookup_and_delete_lru_percpu"))
+               test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c

new file mode 100644 (file)

index 0000000..59adb47
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ *   1. call listen() for 4 server sockets.
+ *   2. call connect() for 25 client sockets.
+ *   3. call listen() for 1 server socket. (migration target)
+ *   4. update a map to migrate all child sockets
+ *        to the last server socket (migrate_map[cookie] = 4)
+ *   5. call shutdown() for first 4 server sockets
+ *        and migrate the requests in the accept queue
+ *        to the last server socket.
+ *   6. call listen() for the second server socket.
+ *   7. call shutdown() for the last server
+ *        and migrate the requests in the accept queue
+ *        to the second server socket.
+ *   8. call listen() for the last server.
+ *   9. call shutdown() for the second server
+ *        and migrate the requests in the accept queue
+ *        to the last server socket.
+ *  10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+       const char *name;
+       __s64 servers[NR_SERVERS];
+       __s64 clients[NR_CLIENTS];
+       struct sockaddr_storage addr;
+       socklen_t addrlen;
+       int family;
+       int state;
+       bool drop_ack;
+       bool expire_synack_timer;
+       bool fastopen;
+       struct bpf_link *link;
+} test_cases[] = {
+       {
+               .name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
+               .family = AF_INET,
+               .state = BPF_TCP_ESTABLISHED,
+               .drop_ack = false,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
+               .family = AF_INET,
+               .state = BPF_TCP_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = true,
+       },
+       {
+               .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+               .family = AF_INET,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = true,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+               .family = AF_INET,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
+               .family = AF_INET6,
+               .state = BPF_TCP_ESTABLISHED,
+               .drop_ack = false,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
+               .family = AF_INET6,
+               .state = BPF_TCP_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = true,
+       },
+       {
+               .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+               .family = AF_INET6,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = true,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+               .family = AF_INET6,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++) {
+               if (fds[i] != -1) {
+                       close(fds[i]);
+                       fds[i] = -1;
+               }
+       }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+       int err = 0, fd, len;
+
+       fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+       if (!ASSERT_NEQ(fd, -1, "open"))
+               return -1;
+
+       if (restore) {
+               len = write(fd, buf, *saved_len);
+               if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+                       err = -1;
+       } else {
+               *saved_len = read(fd, buf, size);
+               if (!ASSERT_GE(*saved_len, 1, "read")) {
+                       err = -1;
+                       goto close;
+               }
+
+               err = lseek(fd, 0, SEEK_SET);
+               if (!ASSERT_OK(err, "lseek"))
+                       goto close;
+
+               /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+                *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+                */
+               len = write(fd, "519", 3);
+               if (!ASSERT_EQ(len, 3, "write - setup"))
+                       err = -1;
+       }
+
+close:
+       close(fd);
+
+       return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+                   struct test_migrate_reuseport *skel)
+{
+       if (test_case->family == AF_INET)
+               skel->bss->server_port = ((struct sockaddr_in *)
+                                         &test_case->addr)->sin_port;
+       else
+               skel->bss->server_port = ((struct sockaddr_in6 *)
+                                         &test_case->addr)->sin6_port;
+
+       test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+                                                 IFINDEX_LO);
+       if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+               return -1;
+
+       return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+       int err;
+
+       err = bpf_link__detach(test_case->link);
+       if (!ASSERT_OK(err, "bpf_link__detach"))
+               return -1;
+
+       test_case->link = NULL;
+
+       return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+                        struct test_migrate_reuseport *skel)
+{
+       int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+       prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+       make_sockaddr(test_case->family,
+                     test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+                     &test_case->addr, &test_case->addrlen);
+
+       for (i = 0; i < NR_SERVERS; i++) {
+               test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+                                              IPPROTO_TCP);
+               if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+                       return -1;
+
+               err = setsockopt(test_case->servers[i], SOL_SOCKET,
+                                SO_REUSEPORT, &reuseport, sizeof(reuseport));
+               if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+                       return -1;
+
+               err = bind(test_case->servers[i],
+                          (struct sockaddr *)&test_case->addr,
+                          test_case->addrlen);
+               if (!ASSERT_OK(err, "bind"))
+                       return -1;
+
+               if (i == 0) {
+                       err = setsockopt(test_case->servers[i], SOL_SOCKET,
+                                        SO_ATTACH_REUSEPORT_EBPF,
+                                        &prog_fd, sizeof(prog_fd));
+                       if (!ASSERT_OK(err,
+                                      "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+                               return -1;
+
+                       err = getsockname(test_case->servers[i],
+                                         (struct sockaddr *)&test_case->addr,
+                                         &test_case->addrlen);
+                       if (!ASSERT_OK(err, "getsockname"))
+                               return -1;
+               }
+
+               if (test_case->fastopen) {
+                       err = setsockopt(test_case->servers[i],
+                                        SOL_TCP, TCP_FASTOPEN,
+                                        &qlen, sizeof(qlen));
+                       if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+                               return -1;
+               }
+
+               /* All requests will be tied to the first four listeners */
+               if (i != MIGRATED_TO) {
+                       err = listen(test_case->servers[i], qlen);
+                       if (!ASSERT_OK(err, "listen"))
+                               return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+       char buf[MSGLEN] = MSG;
+       int i, err;
+
+       for (i = 0; i < NR_CLIENTS; i++) {
+               test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+                                              IPPROTO_TCP);
+               if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+                       return -1;
+
+               /* The attached XDP program drops only the final ACK, so
+                * clients will transition to TCP_ESTABLISHED immediately.
+                */
+               err = settimeo(test_case->clients[i], 100);
+               if (!ASSERT_OK(err, "settimeo"))
+                       return -1;
+
+               if (test_case->fastopen) {
+                       int fastopen = 1;
+
+                       err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+                                        TCP_FASTOPEN_CONNECT, &fastopen,
+                                        sizeof(fastopen));
+                       if (!ASSERT_OK(err,
+                                      "setsockopt - TCP_FASTOPEN_CONNECT"))
+                               return -1;
+               }
+
+               err = connect(test_case->clients[i],
+                             (struct sockaddr *)&test_case->addr,
+                             test_case->addrlen);
+               if (!ASSERT_OK(err, "connect"))
+                       return -1;
+
+               err = write(test_case->clients[i], buf, MSGLEN);
+               if (!ASSERT_EQ(err, MSGLEN, "write"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+                      struct test_migrate_reuseport *skel)
+{
+       int i, err, migrated_to = MIGRATED_TO;
+       int reuseport_map_fd, migrate_map_fd;
+       __u64 value;
+
+       reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+       migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+       for (i = 0; i < NR_SERVERS; i++) {
+               value = (__u64)test_case->servers[i];
+               err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+                                         BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+                       return -1;
+
+               err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+               if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+                       return -1;
+
+               err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+                                         BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+       int i, err;
+
+       /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+        * to the last listener based on eBPF.
+        */
+       for (i = 0; i < MIGRATED_TO; i++) {
+               err = shutdown(test_case->servers[i], SHUT_RDWR);
+               if (!ASSERT_OK(err, "shutdown"))
+                       return -1;
+       }
+
+       /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+       if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+               return 0;
+
+       /* Note that we use the second listener instead of the
+        * first one here.
+        *
+        * The fist listener is bind()ed with port 0 and,
+        * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+        * calling listen() again will bind() the first listener
+        * on a new ephemeral port and detach it from the existing
+        * reuseport group.  (See: __inet_bind(), tcp_set_state())
+        *
+        * OTOH, the second one is bind()ed with a specific port,
+        * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+        * resurrect the listener on the existing reuseport group.
+        */
+       err = listen(test_case->servers[1], QLEN);
+       if (!ASSERT_OK(err, "listen"))
+               return -1;
+
+       /* Migrate from the last listener to the second one.
+        *
+        * All listeners were detached out of the reuseport_map,
+        * so migration will be done by kernel random pick from here.
+        */
+       err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+       if (!ASSERT_OK(err, "shutdown"))
+               return -1;
+
+       /* Back to the existing reuseport group */
+       err = listen(test_case->servers[MIGRATED_TO], QLEN);
+       if (!ASSERT_OK(err, "listen"))
+               return -1;
+
+       /* Migrate back to the last one from the second one */
+       err = shutdown(test_case->servers[1], SHUT_RDWR);
+       if (!ASSERT_OK(err, "shutdown"))
+               return -1;
+
+       return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+                          struct test_migrate_reuseport *skel)
+{
+       struct sockaddr_storage addr;
+       socklen_t len = sizeof(addr);
+       int err, cnt = 0, client;
+       char buf[MSGLEN];
+
+       err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+       if (!ASSERT_OK(err, "settimeo"))
+               goto out;
+
+       for (; cnt < NR_CLIENTS; cnt++) {
+               client = accept(test_case->servers[MIGRATED_TO],
+                               (struct sockaddr *)&addr, &len);
+               if (!ASSERT_NEQ(client, -1, "accept"))
+                       goto out;
+
+               memset(buf, 0, MSGLEN);
+               read(client, &buf, MSGLEN);
+               close(client);
+
+               if (!ASSERT_STREQ(buf, MSG, "read"))
+                       goto out;
+       }
+
+out:
+       ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+       switch (test_case->state) {
+       case BPF_TCP_ESTABLISHED:
+               cnt = skel->bss->migrated_at_close;
+               break;
+       case BPF_TCP_SYN_RECV:
+               cnt = skel->bss->migrated_at_close_fastopen;
+               break;
+       case BPF_TCP_NEW_SYN_RECV:
+               if (test_case->expire_synack_timer)
+                       cnt = skel->bss->migrated_at_send_synack;
+               else
+                       cnt = skel->bss->migrated_at_recv_ack;
+               break;
+       default:
+               cnt = 0;
+       }
+
+       ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+                    struct test_migrate_reuseport *skel)
+{
+       int err, saved_len;
+       char buf[16];
+
+       skel->bss->migrated_at_close = 0;
+       skel->bss->migrated_at_close_fastopen = 0;
+       skel->bss->migrated_at_send_synack = 0;
+       skel->bss->migrated_at_recv_ack = 0;
+
+       init_fds(test_case->servers, NR_SERVERS);
+       init_fds(test_case->clients, NR_CLIENTS);
+
+       if (test_case->fastopen) {
+               memset(buf, 0, sizeof(buf));
+
+               err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+               if (!ASSERT_OK(err, "setup_fastopen - setup"))
+                       return;
+       }
+
+       err = start_servers(test_case, skel);
+       if (!ASSERT_OK(err, "start_servers"))
+               goto close_servers;
+
+       if (test_case->drop_ack) {
+               /* Drop the final ACK of the 3-way handshake and stick the
+                * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+                */
+               err = drop_ack(test_case, skel);
+               if (!ASSERT_OK(err, "drop_ack"))
+                       goto close_servers;
+       }
+
+       /* Tie requests to the first four listners */
+       err = start_clients(test_case);
+       if (!ASSERT_OK(err, "start_clients"))
+               goto close_clients;
+
+       err = listen(test_case->servers[MIGRATED_TO], QLEN);
+       if (!ASSERT_OK(err, "listen"))
+               goto close_clients;
+
+       err = update_maps(test_case, skel);
+       if (!ASSERT_OK(err, "fill_maps"))
+               goto close_clients;
+
+       /* Migrate the requests in the accept queue only.
+        * TCP_NEW_SYN_RECV requests are not migrated at this point.
+        */
+       err = migrate_dance(test_case);
+       if (!ASSERT_OK(err, "migrate_dance"))
+               goto close_clients;
+
+       if (test_case->expire_synack_timer) {
+               /* Wait for SYN+ACK timers to expire so that
+                * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+                */
+               sleep(1);
+       }
+
+       if (test_case->link) {
+               /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+               err = pass_ack(test_case);
+               if (!ASSERT_OK(err, "pass_ack"))
+                       goto close_clients;
+       }
+
+       count_requests(test_case, skel);
+
+close_clients:
+       close_fds(test_case->clients, NR_CLIENTS);
+
+       if (test_case->link) {
+               err = pass_ack(test_case);
+               ASSERT_OK(err, "pass_ack - clean up");
+       }
+
+close_servers:
+       close_fds(test_case->servers, NR_SERVERS);
+
+       if (test_case->fastopen) {
+               err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+               ASSERT_OK(err, "setup_fastopen - restore");
+       }
+}
+
+void test_migrate_reuseport(void)
+{
+       struct test_migrate_reuseport *skel;
+       int i;
+
+       skel = test_migrate_reuseport__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+               test__start_subtest(test_cases[i].name);
+               run_test(&test_cases[i], skel);
+       }
+
+       test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c

index e178416..6194b77 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -38,13 +38,13 @@ void test_obj_name(void)
  
                 fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
                 CHECK((tests[i].success && fd < 0) ||
-                     (!tests[i].success && fd != -1) ||
+                     (!tests[i].success && fd >= 0) ||
                       (!tests[i].success && errno != tests[i].expected_errno),
                       "check-bpf-prog-name",
                       "fd %d(%d) errno %d(%d)\n",
                        fd, tests[i].success, errno, tests[i].expected_errno);
  
-               if (fd != -1)
+               if (fd >= 0)
                         close(fd);
  
                 /* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
                 memcpy(attr.map_name, tests[i].name, ncopy);
                 fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
                 CHECK((tests[i].success && fd < 0) ||
-                     (!tests[i].success && fd != -1) ||
+                     (!tests[i].success && fd >= 0) ||
                       (!tests[i].success && errno != tests[i].expected_errno),
                       "check-bpf-map-name",
                       "fd %d(%d) errno %d(%d)\n",
                       fd, tests[i].success, errno, tests[i].expected_errno);
  
-               if (fd != -1)
+               if (fd >= 0)
                         close(fd);
         }
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c

index e35c444..12c4f45 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
  
         /* attach perf_event */
         link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
-       if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_perf_event"))
                 goto out_destroy_skel;
  
         /* generate some branches on cpu 0 */
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
          * Some setups don't support branch records (virtual machines, !x86),
          * so skip test in this case.
          */
-       if (pfd == -1) {
+       if (pfd < 0) {
                 if (errno == ENOENT || errno == EOPNOTSUPP) {
                         printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
                                __func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c

index ca9f089..6490e96 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -80,7 +80,7 @@ void test_perf_buffer(void)
         pb_opts.sample_cb = on_sample;
         pb_opts.ctx = &cpu_seen;
         pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                 goto out_close;
  
         CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c

index 72c3690..33144c9 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -97,8 +97,7 @@ void test_perf_event_stackmap(void)
  
         skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                            pmu_fd);
-       if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
-                 "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+       if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
                 close(pmu_fd);
                 goto cleanup;
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c

index 7aecfd9..95bd120 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -15,7 +15,7 @@ void test_probe_user(void)
         static const int zero = 0;
  
         obj = bpf_object__open_file(obj_file, &opts);
-       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open_file"))
                 return;
  
         kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
@@ -33,11 +33,8 @@ void test_probe_user(void)
                 goto cleanup;
  
         kprobe_link = bpf_program__attach(kprobe_prog);
-       if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
-                 "err %ld\n", PTR_ERR(kprobe_link))) {
-               kprobe_link = NULL;
+       if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
                 goto cleanup;
-       }
  
         memset(&curr, 0, sizeof(curr));
         in->sin_family = AF_INET;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c

index 131d7f7..89fc98f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -46,7 +46,7 @@ void test_prog_run_xattr(void)
         tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
  
         err = bpf_prog_test_run_xattr(&tattr);
-       CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
+       CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
               "err %d errno %d retval %d\n", err, errno, tattr.retval);
  
         CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
@@ -78,6 +78,6 @@ void test_prog_run_xattr(void)
  cleanup:
         if (skel)
                 test_pkt_access__destroy(skel);
-       if (stats_fd != -1)
+       if (stats_fd >= 0)
                 close(stats_fd);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c

index c5fb191..41720a6 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -77,7 +77,7 @@ void test_raw_tp_test_run(void)
         /* invalid cpu ID should fail with ENXIO */
         opts.cpu = 0xffffffff;
         err = bpf_prog_test_run_opts(prog_fd, &opts);
-       CHECK(err != -1 || errno != ENXIO,
+       CHECK(err >= 0 || errno != ENXIO,
               "test_run_opts_fail",
               "should failed with ENXIO\n");
  
@@ -85,7 +85,7 @@ void test_raw_tp_test_run(void)
         opts.cpu = 1;
         opts.flags = 0;
         err = bpf_prog_test_run_opts(prog_fd, &opts);
-       CHECK(err != -1 || errno != EINVAL,
+       CHECK(err >= 0 || errno != EINVAL,
               "test_run_opts_fail",
               "should failed with EINVAL\n");
  
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c

index 563e121..5f9eaa3 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -30,7 +30,7 @@ void test_rdonly_maps(void)
         struct bss bss;
  
         obj = bpf_object__open_file(file, NULL);
-       if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open"))
                 return;
  
         err = bpf_object__load(obj);
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
                         goto cleanup;
  
                 link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
-               if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
-                         t->prog_name, PTR_ERR(link))) {
-                       link = NULL;
+               if (!ASSERT_OK_PTR(link, "attach_prog"))
                         goto cleanup;
-               }
  
                 /* trigger probe */
                 usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c

index ac1ee10..de26881 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -15,7 +15,7 @@ void test_reference_tracking(void)
         int err = 0;
  
         obj = bpf_object__open_file(file, &open_opts);
-       if (CHECK_FAIL(IS_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open_file"))
                 return;
  
         if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c

index d3c2de2..f623613 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
         }
  
         for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
-               if (test_symbols[i].id != -1)
+               if (test_symbols[i].id >= 0)
                         continue;
  
                 if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c

index cef63e7..167cd8a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -63,7 +63,7 @@ void test_ringbuf_multi(void)
                 goto cleanup;
  
         proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
-       if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+       if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
                 goto cleanup;
  
         err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c

index 821b414..4efd337 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type)
         attr.max_entries = REUSEPORT_ARRAY_SIZE;
  
         reuseport_array = bpf_create_map_xattr(&attr);
-       RET_ERR(reuseport_array == -1, "creating reuseport_array",
+       RET_ERR(reuseport_array < 0, "creating reuseport_array",
                 "reuseport_array:%d errno:%d\n", reuseport_array, errno);
  
         /* Creating outer_map */
@@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type)
         attr.max_entries = 1;
         attr.inner_map_fd = reuseport_array;
         outer_map = bpf_create_map_xattr(&attr);
-       RET_ERR(outer_map == -1, "creating outer_map",
+       RET_ERR(outer_map < 0, "creating outer_map",
                 "outer_map:%d errno:%d\n", outer_map, errno);
  
         return 0;
@@ -102,8 +102,9 @@ static int prepare_bpf_obj(void)
         int err;
  
         obj = bpf_object__open("test_select_reuseport_kern.o");
-       RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
-               "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+       err = libbpf_get_error(obj);
+       RET_ERR(err, "open test_select_reuseport_kern.o",
+               "obj:%p PTR_ERR(obj):%d\n", obj, err);
  
         map = bpf_object__find_map_by_name(obj, "outer_map");
         RET_ERR(!map, "find outer_map", "!map\n");
@@ -116,31 +117,31 @@ static int prepare_bpf_obj(void)
         prog = bpf_program__next(NULL, obj);
         RET_ERR(!prog, "get first bpf_program", "!prog\n");
         select_by_skb_data_prog = bpf_program__fd(prog);
-       RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+       RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
                 "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
  
         map = bpf_object__find_map_by_name(obj, "result_map");
         RET_ERR(!map, "find result_map", "!map\n");
         result_map = bpf_map__fd(map);
-       RET_ERR(result_map == -1, "get result_map fd",
+       RET_ERR(result_map < 0, "get result_map fd",
                 "result_map:%d\n", result_map);
  
         map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
         RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
         tmp_index_ovr_map = bpf_map__fd(map);
-       RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+       RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
                 "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
  
         map = bpf_object__find_map_by_name(obj, "linum_map");
         RET_ERR(!map, "find linum_map", "!map\n");
         linum_map = bpf_map__fd(map);
-       RET_ERR(linum_map == -1, "get linum_map fd",
+       RET_ERR(linum_map < 0, "get linum_map fd",
                 "linum_map:%d\n", linum_map);
  
         map = bpf_object__find_map_by_name(obj, "data_check_map");
         RET_ERR(!map, "find data_check_map", "!map\n");
         data_check_map = bpf_map__fd(map);
-       RET_ERR(data_check_map == -1, "get data_check_map fd",
+       RET_ERR(data_check_map < 0, "get data_check_map fd",
                 "data_check_map:%d\n", data_check_map);
  
         return 0;
@@ -237,7 +238,7 @@ static long get_linum(void)
         int err;
  
         err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
-       RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+       RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
                 err, errno);
  
         return linum;
@@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
         addrlen = sizeof(cli_sa);
         err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
                           &addrlen);
-       RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+       RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
                err, errno);
  
         err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
-       RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+       RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
                err, errno);
  
         if (type == SOCK_STREAM) {
@@ -347,7 +348,7 @@ static void check_results(void)
  
         for (i = 0; i < NR_RESULTS; i++) {
                 err = bpf_map_lookup_elem(result_map, &i, &results[i]);
-               RET_IF(err == -1, "lookup_elem(result_map)",
+               RET_IF(err < 0, "lookup_elem(result_map)",
                        "i:%u err:%d errno:%d\n", i, err, errno);
         }
  
@@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family)
          */
         err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
                                   &tmp_index, BPF_ANY);
-       RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+       RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
                "err:%d errno:%d\n", err, errno);
         do_test(type, family, &cmd, PASS);
         err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
                                   &tmp_index);
-       RET_IF(err == -1 || tmp_index != -1,
+       RET_IF(err < 0 || tmp_index >= 0,
                "lookup_elem(tmp_index_ovr_map)",
                "err:%d errno:%d tmp_index:%d\n",
                err, errno, tmp_index);
@@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family)
  
         for (i = 0; i < NR_RESULTS; i++) {
                 err = bpf_map_lookup_elem(result_map, &i, &tmp);
-               RET_IF(err == -1, "lookup_elem(result_map)",
+               RET_IF(err < 0, "lookup_elem(result_map)",
                        "i:%u err:%d errno:%d\n", i, err, errno);
                 nr_run_before += tmp;
         }
@@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family)
  
         for (i = 0; i < NR_RESULTS; i++) {
                 err = bpf_map_lookup_elem(result_map, &i, &tmp);
-               RET_IF(err == -1, "lookup_elem(result_map)",
+               RET_IF(err < 0, "lookup_elem(result_map)",
                        "i:%u err:%d errno:%d\n", i, err, errno);
                 nr_run_after += tmp;
         }
@@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
                                          SO_ATTACH_REUSEPORT_EBPF,
                                          &select_by_skb_data_prog,
                                          sizeof(select_by_skb_data_prog));
-                       RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+                       RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
                                "err:%d errno:%d\n", err, errno);
                 }
  
                 err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
-               RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+               RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
                        i, err, errno);
  
                 if (type == SOCK_STREAM) {
                         err = listen(sk_fds[i], 10);
-                       RET_IF(err == -1, "listen()",
+                       RET_IF(err < 0, "listen()",
                                "sk_fds[%d] err:%d errno:%d\n",
                                i, err, errno);
                 }
  
                 err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
                                           BPF_NOEXIST);
-               RET_IF(err == -1, "update_elem(reuseport_array)",
+               RET_IF(err < 0, "update_elem(reuseport_array)",
                        "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
  
                 if (i == first) {
@@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
         prepare_sk_fds(type, family, inany);
         err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
                                   BPF_ANY);
-       RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+       RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
                "err:%d errno:%d\n", err, errno);
  
         /* Install reuseport_array to outer_map? */
@@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
  
         err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
                                   BPF_ANY);
-       RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+       RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
                "err:%d errno:%d\n", err, errno);
  }
  
@@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map)
                 return;
  
         err = bpf_map_delete_elem(outer_map, &index_zero);
-       RET_IF(err == -1, "delete_elem(outer_map)",
+       RET_IF(err < 0, "delete_elem(outer_map)",
                "err:%d errno:%d\n", err, errno);
  }
  
  static void cleanup(void)
  {
-       if (outer_map != -1) {
+       if (outer_map >= 0) {
                 close(outer_map);
                 outer_map = -1;
         }
  
-       if (reuseport_array != -1) {
+       if (reuseport_array >= 0) {
                 close(reuseport_array);
                 reuseport_array = -1;
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c

index a1eade5..023cc53 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
  
                 skel->links.send_signal_perf =
                         bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
-               if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
-                         "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+               if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
                         goto disable_pmu;
         }
  
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c

index 45c82db..aee4154 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
         }
  
         link = bpf_program__attach_netns(prog, net_fd);
-       if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+       if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
                 errno = -PTR_ERR(link);
                 log_err("failed to attach program '%s' to netns",
                         bpf_program__name(prog));
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c

index af87118..577d619 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -97,12 +97,12 @@ static void check_result(void)
  
         err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
                                   &egress_linum);
-       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+       CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
               "err:%d errno:%d\n", err, errno);
  
         err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
                                   &ingress_linum);
-       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+       CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
               "err:%d errno:%d\n", err, errno);
  
         memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
@@ -355,14 +355,12 @@ void test_sock_fields(void)
  
         egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
                                                  child_cg_fd);
-       if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
-                 PTR_ERR(egress_link)))
+       if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
                 goto done;
  
         ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
                                                   child_cg_fd);
-       if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
-                 PTR_ERR(ingress_link)))
+       if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
                 goto done;
  
         linum_map_fd = bpf_map__fd(skel->maps.linum_map);
@@ -375,8 +373,8 @@ done:
         bpf_link__destroy(egress_link);
         bpf_link__destroy(ingress_link);
         test_sock_fields__destroy(skel);
-       if (child_cg_fd != -1)
+       if (child_cg_fd >= 0)
                 close(child_cg_fd);
-       if (parent_cg_fd != -1)
+       if (parent_cg_fd >= 0)
                 close(parent_cg_fd);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c

index ab77596..1352ec1 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
         int s, map, err;
  
         s = connected_socket_v4();
-       if (CHECK_FAIL(s == -1))
+       if (CHECK_FAIL(s < 0))
                 return;
  
         map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
-       if (CHECK_FAIL(map == -1)) {
+       if (CHECK_FAIL(map < 0)) {
                 perror("bpf_create_map");
                 goto out;
         }
@@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
         opts.link_info = &linfo;
         opts.link_info_len = sizeof(linfo);
         link = bpf_program__attach_iter(skel->progs.copy, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                 goto out;
  
         iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
         }
  
         err = bpf_prog_attach(verdict, map, second, 0);
-       assert(err == -1 && errno == EBUSY);
+       ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
  
         err = bpf_prog_detach2(verdict, map, first);
         if (CHECK_FAIL(err)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c

index 06b86ad..7a0d64f 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
         int map;
  
         map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
-       if (CHECK_FAIL(map == -1)) {
+       if (CHECK_FAIL(map < 0)) {
                 perror("bpf_map_create");
                 return;
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c

index 648d9ae..0f066b8 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -139,7 +139,7 @@
  #define xbpf_map_delete_elem(fd, key)                                          \
         ({                                                                     \
                 int __ret = bpf_map_delete_elem((fd), (key));                  \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                         FAIL_ERRNO("map_delete");                              \
                 __ret;                                                         \
         })
@@ -147,7 +147,7 @@
  #define xbpf_map_lookup_elem(fd, key, val)                                     \
         ({                                                                     \
                 int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                         FAIL_ERRNO("map_lookup");                              \
                 __ret;                                                         \
         })
@@ -155,7 +155,7 @@
  #define xbpf_map_update_elem(fd, key, val, flags)                              \
         ({                                                                     \
                 int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                         FAIL_ERRNO("map_update");                              \
                 __ret;                                                         \
         })
@@ -164,7 +164,7 @@
         ({                                                                     \
                 int __ret =                                                    \
                         bpf_prog_attach((prog), (target), (type), (flags));    \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                         FAIL_ERRNO("prog_attach(" #type ")");                  \
                 __ret;                                                         \
         })
@@ -172,7 +172,7 @@
  #define xbpf_prog_detach2(prog, target, type)                                  \
         ({                                                                     \
                 int __ret = bpf_prog_detach2((prog), (target), (type));        \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                         FAIL_ERRNO("prog_detach2(" #type ")");                 \
                 __ret;                                                         \
         })
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c

index 11a769e..0a91d8d 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -62,8 +62,7 @@ retry:
  
         skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                            pmu_fd);
-       if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
-                 "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+       if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
                 close(pmu_fd);
                 goto cleanup;
         }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c

index 37269d2..04b476b 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -21,7 +21,7 @@ void test_stacktrace_map(void)
                 goto close_prog;
  
         link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
-       if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_tp"))
                 goto close_prog;
  
         /* find map fds */
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c

index 404a549..4fd30bb 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void)
                 goto close_prog;
  
         link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
-       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                 goto close_prog;
  
         /* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
                 goto close_prog;
  
  close_prog:
-       if (!IS_ERR_OR_NULL(link))
-               bpf_link__destroy(link);
+       bpf_link__destroy(link);
         bpf_object__close(obj);
  }
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c

index 08d19ca..1fa7720 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -353,8 +353,7 @@ static void fastopen_estab(void)
                 return;
  
         link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                 return;
  
         if (sk_fds_connect(&sk_fds, true)) {
@@ -398,8 +397,7 @@ static void syncookie_estab(void)
                 return;
  
         link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                 return;
  
         if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +429,7 @@ static void fin(void)
                 return;
  
         link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                 return;
  
         if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +468,7 @@ static void __simple_estab(bool exprm)
                 return;
  
         link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                 return;
  
         if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +505,7 @@ static void misc(void)
                 return;
  
         link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
                 return;
  
         if (sk_fds_connect(&sk_fds, false)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c

index 9966685..123c68c 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -73,7 +73,7 @@ void test_test_overhead(void)
                 return;
  
         obj = bpf_object__open_file("./test_overhead.o", NULL);
-       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open_file"))
                 return;
  
         kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
@@ -108,7 +108,7 @@ void test_test_overhead(void)
         /* attach kprobe */
         link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
                                           kprobe_func);
-       if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_kprobe"))
                 goto cleanup;
         test_run("kprobe");
         bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
         /* attach kretprobe */
         link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
                                           kprobe_func);
-       if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
                 goto cleanup;
         test_run("kretprobe");
         bpf_link__destroy(link);
  
         /* attach raw_tp */
         link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
-       if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                 goto cleanup;
         test_run("raw_tp");
         bpf_link__destroy(link);
  
         /* attach fentry */
         link = bpf_program__attach_trace(fentry_prog);
-       if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_fentry"))
                 goto cleanup;
         test_run("fentry");
         bpf_link__destroy(link);
  
         /* attach fexit */
         link = bpf_program__attach_trace(fexit_prog);
-       if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_fexit"))
                 goto cleanup;
         test_run("fexit");
         bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c

index f3022d9..d7f5a93 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,7 +55,7 @@ void test_trampoline_count(void)
         /* attach 'allowed' trampoline programs */
         for (i = 0; i < MAX_TRAMP_PROGS; i++) {
                 obj = bpf_object__open_file(object, NULL);
-               if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+               if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
                         obj = NULL;
                         goto cleanup;
                 }
@@ -68,14 +68,14 @@ void test_trampoline_count(void)
  
                 if (rand() % 2) {
                         link = load(inst[i].obj, fentry_name);
-                       if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+                       if (!ASSERT_OK_PTR(link, "attach_prog")) {
                                 link = NULL;
                                 goto cleanup;
                         }
                         inst[i].link_fentry = link;
                 } else {
                         link = load(inst[i].obj, fexit_name);
-                       if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+                       if (!ASSERT_OK_PTR(link, "attach_prog")) {
                                 link = NULL;
                                 goto cleanup;
                         }
@@ -85,7 +85,7 @@ void test_trampoline_count(void)
  
         /* and try 1 extra.. */
         obj = bpf_object__open_file(object, NULL);
-       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+       if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
                 obj = NULL;
                 goto cleanup;
         }
@@ -96,13 +96,15 @@ void test_trampoline_count(void)
  
         /* ..that needs to fail */
         link = load(obj, fentry_name);
-       if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+       err = libbpf_get_error(link);
+       if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
                 bpf_link__destroy(link);
                 goto cleanup_extra;
         }
  
         /* with E2BIG error */
-       CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+       ASSERT_EQ(err, -E2BIG, "proper error check");
+       ASSERT_EQ(link, NULL, "ptr_is_null");
  
         /* and finaly execute the probe */
         if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c

index 2aba09d..56c9d6b 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -22,11 +22,10 @@ void test_udp_limit(void)
                 goto close_cgroup_fd;
  
         skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+       if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+               goto close_skeleton;
         skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
-       if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
-                 "cg-attach", "sock %ld sock_release %ld",
-                 PTR_ERR(skel->links.sock),
-                 PTR_ERR(skel->links.sock_release)))
+       if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
                 goto close_skeleton;
  
         /* BPF program enforces a single UDP socket per cgroup,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c

index 2c6c570..3bd5904 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void)
         pb_opts.ctx = &passed;
         pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
                               1, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                 goto out;
  
         /* Run test program */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c

index 6f81499..46eed0a 100644 (file)
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -51,7 +51,7 @@ void test_xdp_link(void)
  
         /* BPF link is not allowed to replace prog attachment */
         link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+       if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
                 bpf_link__destroy(link);
                 /* best-effort detach prog */
                 opts.old_fd = prog_fd1;
@@ -67,7 +67,7 @@ void test_xdp_link(void)
  
         /* now BPF link should attach successfully */
         link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                 goto cleanup;
         skel1->links.xdp_handler = link;
  
@@ -95,7 +95,7 @@ void test_xdp_link(void)
  
         /* BPF link is not allowed to replace another BPF link */
         link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+       if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
                 bpf_link__destroy(link);
                 goto cleanup;
         }
@@ -105,7 +105,7 @@ void test_xdp_link(void)
  
         /* new link attach should succeed */
         link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                 goto cleanup;
         skel2->links.xdp_handler = link;
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c

index 6dfce3f..0aa3cd3 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c

index b83b5d2..6c39e86 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c

index d58d9f1..784a610 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -3,7 +3,6 @@
  #include "bpf_iter.h"
  #include "bpf_tracing_net.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c

index 95989f4..a28e51e 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -3,7 +3,6 @@
  #include "bpf_iter.h"
  #include "bpf_tracing_net.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c

index b7f32c1..c86b93f 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c

index a1ddc36..bca8b88 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020, Oracle and/or its affiliates. */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  #include <bpf/bpf_core_read.h>
  
  #include <errno.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c

index b2f7c7c..6e7b400 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c

index 43c36f5..f2b8167 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c

index 11d1aa3..4ea6a37 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -2,7 +2,6 @@
  /* Copyright (c) 2020 Facebook */
  #include "bpf_iter.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  char _license[] SEC("license") = "GPL";
  
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c

index 54380c5..2e4775c 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -3,7 +3,6 @@
  #include "bpf_iter.h"
  #include "bpf_tracing_net.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  #include <bpf/bpf_endian.h>
  
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c

index b4fbddf..943f7bb 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -3,7 +3,6 @@
  #include "bpf_iter.h"
  #include "bpf_tracing_net.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  #include <bpf/bpf_endian.h>
  
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c

index f258583..cf0c485 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -3,7 +3,6 @@
  #include "bpf_iter.h"
  #include "bpf_tracing_net.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  #include <bpf/bpf_endian.h>
  
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c

index 65f93bb..5031e21 100644 (file)
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -3,7 +3,6 @@
  #include "bpf_iter.h"
  #include "bpf_tracing_net.h"
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  #include <bpf/bpf_endian.h>
  
  char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c

new file mode 100644 (file)

index 0000000..3a193f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 2);
+       __type(key, __u64);
+       __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+       if (set_pid == bpf_get_current_pid_tgid() >> 32)
+               bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c

new file mode 100644 (file)

index 0000000..27df571
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ *   1. If reuse_md->migrating_sk is NULL (SYN packet),
+ *        return SK_PASS without selecting a listener.
+ *   2. If reuse_md->migrating_sk is not NULL (socket migration),
+ *        select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+       __uint(max_entries, 256);
+       __type(key, int);
+       __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 256);
+       __type(key, __u64);
+       __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+       struct ethhdr *eth = data;
+       struct tcphdr *tcp = NULL;
+
+       if (eth + 1 > data_end)
+               goto pass;
+
+       switch (bpf_ntohs(eth->h_proto)) {
+       case ETH_P_IP: {
+               struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+               if (ip + 1 > data_end)
+                       goto pass;
+
+               if (ip->protocol != IPPROTO_TCP)
+                       goto pass;
+
+               tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+               break;
+       }
+       case ETH_P_IPV6: {
+               struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+               if (ipv6 + 1 > data_end)
+                       goto pass;
+
+               if (ipv6->nexthdr != IPPROTO_TCP)
+                       goto pass;
+
+               tcp = (struct tcphdr *)(ipv6 + 1);
+               break;
+       }
+       default:
+               goto pass;
+       }
+
+       if (tcp + 1 > data_end)
+               goto pass;
+
+       if (tcp->dest != server_port)
+               goto pass;
+
+       if (!tcp->syn && tcp->ack)
+               return XDP_DROP;
+
+pass:
+       return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+       int *key, flags = 0, state, err;
+       __u64 cookie;
+
+       if (!reuse_md->migrating_sk)
+               return SK_PASS;
+
+       state = reuse_md->migrating_sk->state;
+       cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+       key = bpf_map_lookup_elem(&migrate_map, &cookie);
+       if (!key)
+               return SK_DROP;
+
+       err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+       if (err)
+               return SK_PASS;
+
+       switch (state) {
+       case BPF_TCP_ESTABLISHED:
+               __sync_fetch_and_add(&migrated_at_close, 1);
+               break;
+       case BPF_TCP_SYN_RECV:
+               __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+               break;
+       case BPF_TCP_NEW_SYN_RECV:
+               if (!reuse_md->len)
+                       __sync_fetch_and_add(&migrated_at_send_synack, 1);
+               else
+                       __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+               break;
+       }
+
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c

index e35129b..e2ad261 100644 (file)
--- a/tools/testing/selftests/bpf/progs/test_snprintf.c
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -3,7 +3,6 @@
  
  #include <linux/bpf.h>
  #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
  
  __u32 pid = 0;
  
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c

new file mode 100644 (file)

index 0000000..880debc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, __u32);
+       __type(value, __be64);
+       __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp_redirect_map_multi")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       int if_index = ctx->ingress_ifindex;
+       struct ethhdr *eth = data;
+       __u16 h_proto;
+       __u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       h_proto = eth->h_proto;
+
+       /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+       if (h_proto == bpf_htons(ETH_P_IP))
+               return bpf_redirect_map(&map_all, 0,
+                                       BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+       /* Using IPv6 for none flag testing */
+       else if (h_proto == bpf_htons(ETH_P_IPV6))
+               return bpf_redirect_map(&map_all, if_index, 0);
+       /* All others for BPF_F_BROADCAST testing */
+       else
+               return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp_redirect_map_ingress")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+       return bpf_redirect_map(&map_egress, 0,
+                               BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       __u32 key = ctx->egress_ifindex;
+       struct ethhdr *eth = data;
+       __u64 nh_off;
+       __be64 *mac;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       mac = bpf_map_lookup_elem(&mac_map, &key);
+       if (mac)
+               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh

index 7eb940a..ed12111 100755 (executable)
--- a/tools/testing/selftests/bpf/test_doc_build.sh
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -1,5 +1,6 @@
  #!/bin/bash
  # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
  
  # Assume script is located under tools/testing/selftests/bpf/. We want to start
  # build attempts from the top of kernel repository.
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c

index 6a5349f..7e9049f 100644 (file)
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
         assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
                errno == ENOENT);
  
+       /* lookup elem key=1 and delete it, then check it doesn't exist */
+       key = 1;
+       assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+       assert(value[0] == 1234);
+
+       /* remove the same element from the expected map */
+       assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
         assert(map_equal(lru_map_fd, expected_map_fd));
  
         close(expected_map_fd);
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c

index 51adc42..30cbf5d 100644 (file)
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data)
  
         value = 0;
         /* BPF_NOEXIST means add new element if it doesn't exist. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
                /* key=1 already exists. */
                errno == EEXIST);
  
         /* -1 is an invalid flag. */
-       assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
                errno == EINVAL);
  
         /* Check that key=1 can be found. */
         assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
  
         key = 2;
+       value = 1234;
+       /* Insert key=2 element. */
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+       /* Check that key=2 matches the value and delete it */
+       assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
         /* Check that key=2 is not found. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
  
         /* BPF_EXIST means update existing element. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
                /* key=2 is not there. */
                errno == ENOENT);
  
@@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data)
          * inserted due to max_entries limit.
          */
         key = 0;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
                errno == E2BIG);
  
         /* Update existing element, though the map is full. */
@@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data)
         key = 2;
         assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
         key = 3;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
                errno == E2BIG);
  
         /* Check that key = 0 doesn't exist. */
         key = 0;
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
  
         /* Iterate over two elements. */
         assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data)
         assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
                (next_key == 1 || next_key == 2) &&
                (next_key != first_key));
-       assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
                errno == ENOENT);
  
         /* Delete both elements. */
@@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data)
         assert(bpf_map_delete_elem(fd, &key) == 0);
         key = 2;
         assert(bpf_map_delete_elem(fd, &key) == 0);
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
  
         key = 0;
         /* Check that map is empty. */
-       assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
                errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
                errno == ENOENT);
  
         close(fd);
@@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
         /* Insert key=1 element. */
         assert(!(expected_key_mask & key));
         assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+       /* Lookup and delete elem key=1 and check value. */
+       assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+              bpf_percpu(value,0) == 100);
+
+       for (i = 0; i < nr_cpus; i++)
+               bpf_percpu(value,i) = i + 100;
+
+       /* Insert key=1 element which should not exist. */
+       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
         expected_key_mask |= key;
  
         /* BPF_NOEXIST means add new element if it doesn't exist. */
-       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
                /* key=1 already exists. */
                errno == EEXIST);
  
         /* -1 is an invalid flag. */
-       assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
                errno == EINVAL);
  
         /* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
  
         key = 2;
         /* Check that key=2 is not found. */
-       assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
  
         /* BPF_EXIST means update existing element. */
-       assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
                /* key=2 is not there. */
                errno == ENOENT);
  
@@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
          * inserted due to max_entries limit.
          */
         key = 0;
-       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
                errno == E2BIG);
  
         /* Check that key = 0 doesn't exist. */
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
  
         /* Iterate over two elements. */
         assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data)
         assert(bpf_map_delete_elem(fd, &key) == 0);
         key = 2;
         assert(bpf_map_delete_elem(fd, &key) == 0);
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
  
         key = 0;
         /* Check that map is empty. */
-       assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
                errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
                errno == ENOENT);
  
         close(fd);
@@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data)
         assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
  
         value = 0;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
                errno == EEXIST);
  
         /* Check that key=1 can be found. */
@@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data)
          * due to max_entries limit.
          */
         key = 2;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
                errno == E2BIG);
  
         /* Check that key = 2 doesn't exist. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
  
         /* Iterate over two elements. */
         assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data)
                next_key == 0);
         assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
                next_key == 1);
-       assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
                errno == ENOENT);
  
         /* Delete shouldn't succeed. */
         key = 1;
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
  
         close(fd);
  }
@@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
         assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
  
         bpf_percpu(values, 0) = 0;
-       assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
                errno == EEXIST);
  
         /* Check that key=1 can be found. */
@@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
  
         /* Check that key=2 cannot be inserted due to max_entries limit. */
         key = 2;
-       assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
                errno == E2BIG);
  
         /* Check that key = 2 doesn't exist. */
-       assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
  
         /* Iterate over two elements. */
         assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
                next_key == 0);
         assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
                next_key == 1);
-       assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
                errno == ENOENT);
  
         /* Delete shouldn't succeed. */
         key = 1;
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
  
         close(fd);
  }
@@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data)
                 assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
  
         /* Check that element cannot be pushed due to max_entries limit */
-       assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+       assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
                errno == E2BIG);
  
         /* Peek element */
@@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data)
                        val == vals[i]);
  
         /* Check that there are not elements left */
-       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
                errno == ENOENT);
  
         /* Check that non supported functions set errno to EINVAL */
-       assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
-       assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+       assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
  
         close(fd);
  }
@@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data)
                 assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
  
         /* Check that element cannot be pushed due to max_entries limit */
-       assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+       assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
                errno == E2BIG);
  
         /* Peek element */
@@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data)
                        val == vals[i]);
  
         /* Check that there are not elements left */
-       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
                errno == ENOENT);
  
         /* Check that non supported functions set errno to EINVAL */
-       assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
-       assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+       assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
  
         close(fd);
  }
@@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data)
         }
  
         bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
-       if (IS_ERR(bpf_map_rx)) {
+       if (!bpf_map_rx) {
                 printf("Failed to load map rx from verdict prog\n");
                 goto out_sockmap;
         }
@@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data)
         }
  
         bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
-       if (IS_ERR(bpf_map_tx)) {
+       if (!bpf_map_tx) {
                 printf("Failed to load map tx from verdict prog\n");
                 goto out_sockmap;
         }
@@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data)
         }
  
         bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
-       if (IS_ERR(bpf_map_msg)) {
+       if (!bpf_map_msg) {
                 printf("Failed to load map msg from msg_verdict prog\n");
                 goto out_sockmap;
         }
@@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data)
         }
  
         bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
-       if (IS_ERR(bpf_map_break)) {
+       if (!bpf_map_break) {
                 printf("Failed to load map tx from verdict prog\n");
                 goto out_sockmap;
         }
@@ -1153,7 +1170,7 @@ static void test_map_in_map(void)
         }
  
         map = bpf_object__find_map_by_name(obj, "mim_array");
-       if (IS_ERR(map)) {
+       if (!map) {
                 printf("Failed to load array of maps from test prog\n");
                 goto out_map_in_map;
         }
@@ -1164,7 +1181,7 @@ static void test_map_in_map(void)
         }
  
         map = bpf_object__find_map_by_name(obj, "mim_hash");
-       if (IS_ERR(map)) {
+       if (!map) {
                 printf("Failed to load hash of maps from test prog\n");
                 goto out_map_in_map;
         }
@@ -1177,7 +1194,7 @@ static void test_map_in_map(void)
         bpf_object__load(obj);
  
         map = bpf_object__find_map_by_name(obj, "mim_array");
-       if (IS_ERR(map)) {
+       if (!map) {
                 printf("Failed to load array of maps from test prog\n");
                 goto out_map_in_map;
         }
@@ -1194,7 +1211,7 @@ static void test_map_in_map(void)
         }
  
         map = bpf_object__find_map_by_name(obj, "mim_hash");
-       if (IS_ERR(map)) {
+       if (!map) {
                 printf("Failed to load hash of maps from test prog\n");
                 goto out_map_in_map;
         }
@@ -1246,7 +1263,7 @@ static void test_map_large(void)
         }
  
         key.c = -1;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
                errno == E2BIG);
  
         /* Iterate through all elements. */
@@ -1254,12 +1271,12 @@ static void test_map_large(void)
         key.c = -1;
         for (i = 0; i < MAP_SIZE; i++)
                 assert(bpf_map_get_next_key(fd, &key, &key) == 0);
-       assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
  
         key.c = 0;
         assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
         key.a = 1;
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
  
         close(fd);
  }
@@ -1391,7 +1408,7 @@ static void test_map_parallel(void)
         run_parallel(TASKS, test_update_delete, data);
  
         /* Check that key=0 is already there. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
                errno == EEXIST);
  
         /* Check that all elements were inserted. */
@@ -1399,7 +1416,7 @@ static void test_map_parallel(void)
         key = -1;
         for (i = 0; i < MAP_SIZE; i++)
                 assert(bpf_map_get_next_key(fd, &key, &key) == 0);
-       assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
  
         /* Another check for all elements */
         for (i = 0; i < MAP_SIZE; i++) {
@@ -1415,8 +1432,8 @@ static void test_map_parallel(void)
  
         /* Nothing should be left. */
         key = -1;
-       assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
  }
  
  static void test_map_rdonly(void)
@@ -1434,12 +1451,12 @@ static void test_map_rdonly(void)
         key = 1;
         value = 1234;
         /* Try to insert key=1 element. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
                errno == EPERM);
  
         /* Check that key=1 is not found. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
  
         close(fd);
  }
@@ -1462,8 +1479,8 @@ static void test_map_wronly_hash(void)
         assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
  
         /* Check that reading elements and keys from the map is not allowed. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
-       assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+       assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
  
         close(fd);
  }
@@ -1490,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
         assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
  
         /* Peek element should fail */
-       assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+       assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
  
         /* Pop element should fail */
-       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
                errno == EPERM);
  
         close(fd);
@@ -1547,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
                         value = &fd32;
                 }
                 err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
-               CHECK(err != -1 || errno != EINVAL,
+               CHECK(err >= 0 || errno != EINVAL,
                       "reuseport array update unbound sk",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
@@ -1576,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
                          */
                         err = bpf_map_update_elem(map_fd, &index0, value,
                                                   BPF_ANY);
-                       CHECK(err != -1 || errno != EINVAL,
+                       CHECK(err >= 0 || errno != EINVAL,
                               "reuseport array update non-listening sk",
                               "sock_type:%d err:%d errno:%d\n",
                               type, err, errno);
@@ -1606,31 +1623,31 @@ static void test_reuseport_array(void)
  
         map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
                                 sizeof(__u32), sizeof(__u64), array_size, 0);
-       CHECK(map_fd == -1, "reuseport array create",
+       CHECK(map_fd < 0, "reuseport array create",
               "map_fd:%d, errno:%d\n", map_fd, errno);
  
         /* Test lookup/update/delete with invalid index */
         err = bpf_map_delete_elem(map_fd, &bad_index);
-       CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+       CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
               "err:%d errno:%d\n", err, errno);
  
         err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
-       CHECK(err != -1 || errno != E2BIG,
+       CHECK(err >= 0 || errno != E2BIG,
               "reuseport array update >=max_entries",
               "err:%d errno:%d\n", err, errno);
  
         err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
-       CHECK(err != -1 || errno != ENOENT,
+       CHECK(err >= 0 || errno != ENOENT,
               "reuseport array update >=max_entries",
               "err:%d errno:%d\n", err, errno);
  
         /* Test lookup/delete non existence elem */
         err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-       CHECK(err != -1 || errno != ENOENT,
+       CHECK(err >= 0 || errno != ENOENT,
               "reuseport array lookup not-exist elem",
               "err:%d errno:%d\n", err, errno);
         err = bpf_map_delete_elem(map_fd, &index3);
-       CHECK(err != -1 || errno != ENOENT,
+       CHECK(err >= 0 || errno != ENOENT,
               "reuseport array del not-exist elem",
               "err:%d errno:%d\n", err, errno);
  
@@ -1644,7 +1661,7 @@ static void test_reuseport_array(void)
                 /* BPF_EXIST failure case */
                 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                           BPF_EXIST);
-               CHECK(err != -1 || errno != ENOENT,
+               CHECK(err >= 0 || errno != ENOENT,
                       "reuseport array update empty elem BPF_EXIST",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
@@ -1653,7 +1670,7 @@ static void test_reuseport_array(void)
                 /* BPF_NOEXIST success case */
                 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                           BPF_NOEXIST);
-               CHECK(err == -1,
+               CHECK(err < 0,
                       "reuseport array update empty elem BPF_NOEXIST",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
@@ -1662,7 +1679,7 @@ static void test_reuseport_array(void)
                 /* BPF_EXIST success case. */
                 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                           BPF_EXIST);
-               CHECK(err == -1,
+               CHECK(err < 0,
                       "reuseport array update same elem BPF_EXIST",
                       "sock_type:%d err:%d errno:%d\n", type, err, errno);
                 fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1670,7 +1687,7 @@ static void test_reuseport_array(void)
                 /* BPF_NOEXIST failure case */
                 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                           BPF_NOEXIST);
-               CHECK(err != -1 || errno != EEXIST,
+               CHECK(err >= 0 || errno != EEXIST,
                       "reuseport array update non-empty elem BPF_NOEXIST",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
@@ -1679,7 +1696,7 @@ static void test_reuseport_array(void)
                 /* BPF_ANY case (always succeed) */
                 err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                           BPF_ANY);
-               CHECK(err == -1,
+               CHECK(err < 0,
                       "reuseport array update same sk with BPF_ANY",
                       "sock_type:%d err:%d errno:%d\n", type, err, errno);
  
@@ -1688,32 +1705,32 @@ static void test_reuseport_array(void)
  
                 /* The same sk cannot be added to reuseport_array twice */
                 err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
-               CHECK(err != -1 || errno != EBUSY,
+               CHECK(err >= 0 || errno != EBUSY,
                       "reuseport array update same sk with same index",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
  
                 err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
-               CHECK(err != -1 || errno != EBUSY,
+               CHECK(err >= 0 || errno != EBUSY,
                       "reuseport array update same sk with different index",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
  
                 /* Test delete elem */
                 err = bpf_map_delete_elem(map_fd, &index3);
-               CHECK(err == -1, "reuseport array delete sk",
+               CHECK(err < 0, "reuseport array delete sk",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
  
                 /* Add it back with BPF_NOEXIST */
                 err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
-               CHECK(err == -1,
+               CHECK(err < 0,
                       "reuseport array re-add with BPF_NOEXIST after del",
                       "sock_type:%d err:%d errno:%d\n", type, err, errno);
  
                 /* Test cookie */
                 err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-               CHECK(err == -1 || sk_cookie != map_cookie,
+               CHECK(err < 0 || sk_cookie != map_cookie,
                       "reuseport array lookup re-added sk",
                       "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
                       type, err, errno, sk_cookie, map_cookie);
@@ -1722,7 +1739,7 @@ static void test_reuseport_array(void)
                 for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
                         close(grpa_fds64[f]);
                 err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-               CHECK(err != -1 || errno != ENOENT,
+               CHECK(err >= 0 || errno != ENOENT,
                       "reuseport array lookup after close()",
                       "sock_type:%d err:%d errno:%d\n",
                       type, err, errno);
@@ -1733,7 +1750,7 @@ static void test_reuseport_array(void)
         CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
               err, errno);
         err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
-       CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+       CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
               "err:%d errno:%d\n", err, errno);
         close(fd64);
  
@@ -1743,16 +1760,16 @@ static void test_reuseport_array(void)
         /* Test 32 bit fd */
         map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
                                 sizeof(__u32), sizeof(__u32), array_size, 0);
-       CHECK(map_fd == -1, "reuseport array create",
+       CHECK(map_fd < 0, "reuseport array create",
               "map_fd:%d, errno:%d\n", map_fd, errno);
         prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
                               &sk_cookie, 1);
         fd = fd64;
         err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
-       CHECK(err == -1, "reuseport array update 32 bit fd",
+       CHECK(err < 0, "reuseport array update 32 bit fd",
               "err:%d errno:%d\n", err, errno);
         err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-       CHECK(err != -1 || errno != ENOSPC,
+       CHECK(err >= 0 || errno != ENOSPC,
               "reuseport array lookup 32 bit fd",
               "err:%d errno:%d\n", err, errno);
         close(fd);
@@ -1798,6 +1815,8 @@ int main(void)
  {
         srand(time(NULL));
  
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
         map_flags = 0;
         run_all_tests();
  
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c

index 6396932..6f10310 100644 (file)
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -737,6 +737,9 @@ int main(int argc, char **argv)
         if (err)
                 return err;
  
+       /* Use libbpf 1.0 API mode */
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
         libbpf_set_print(libbpf_print_fn);
  
         srand(time(NULL));
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h

index dda52cb..8ef7f33 100644 (file)
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path);
  #define ASSERT_OK_PTR(ptr, name) ({                                    \
         static int duration = 0;                                        \
         const void *___res = (ptr);                                     \
-       bool ___ok = !IS_ERR_OR_NULL(___res);                           \
-       CHECK(!___ok, (name),                                           \
-             "unexpected error: %ld\n", PTR_ERR(___res));              \
+       int ___err = libbpf_get_error(___res);                          \
+       bool ___ok = ___err == 0;                                       \
+       CHECK(!___ok, (name), "unexpected error: %d\n", ___err);        \
         ___ok;                                                          \
  })
  
  #define ASSERT_ERR_PTR(ptr, name) ({                                   \
         static int duration = 0;                                        \
         const void *___res = (ptr);                                     \
-       bool ___ok = IS_ERR(___res);                                    \
+       int ___err = libbpf_get_error(___res);                          \
+       bool ___ok = ___err != 0;                                       \
         CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);      \
         ___ok;                                                          \
  })
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c

index 73da7fe..4a39304 100644 (file)
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -82,6 +82,8 @@ int main(int argc, char **argv)
         cpu_set_t cpuset;
         __u32 key = 0;
  
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
         CPU_ZERO(&cpuset);
         CPU_SET(0, &cpuset);
         pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
@@ -116,7 +118,7 @@ int main(int argc, char **argv)
  
         pb_opts.sample_cb = dummyfn;
         pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
-       if (IS_ERR(pb))
+       if (!pb)
                 goto err;
  
         pthread_create(&tid, NULL, poller_thread, pb);
@@ -163,7 +165,6 @@ err:
         bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
         close(cg_fd);
         cleanup_cgroup_environment();
-       if (!IS_ERR_OR_NULL(pb))
-               perf_buffer__free(pb);
+       perf_buffer__free(pb);
         return error;
  }
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh

new file mode 100755 (executable)

index 0000000..1538373
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+#     - - - - - - - - - - - - - - - - - - - - - - - - -
+#    | veth1         veth2         veth3 |  ... init net
+#     - -| - - - - - - | - - - - - - | - -
+#    ---------     ---------     ---------
+#    | veth0 |     | veth0 |     | veth0 |  ...
+#    ---------     ---------     ---------
+#       ns1           ns2           ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+#   ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+#   the redirects.
+#      ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+#   IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+#   interface should not receive the redirects.
+#      ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+#   IPv6: Testing none flag, all the pkts should be redirected back
+#      ping test: ns1 -> ns2 (block), echo requests will be redirect back
+#   egress_prog:
+#      all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+
+test_pass()
+{
+       echo "Pass: $@"
+       PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+       echo "fail: $@"
+       FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+       for i in $(seq $NUM); do
+               ip link del veth$i 2> /dev/null
+               ip netns del ns$i 2> /dev/null
+       done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+       ip link set dev lo xdpgeneric off &>/dev/null
+       if [ $? -ne 0 ];then
+               echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+               exit 4
+       fi
+
+       which tcpdump &>/dev/null
+       if [ $? -ne 0 ];then
+               echo "selftests: [SKIP] Could not run test without tcpdump"
+               exit 4
+       fi
+}
+
+setup_ns()
+{
+       local mode=$1
+       IFACES=""
+
+       if [ "$mode" = "xdpegress" ]; then
+               mode="xdpdrv"
+       fi
+
+       for i in $(seq $NUM); do
+               ip netns add ns$i
+               ip link add veth$i type veth peer name veth0 netns ns$i
+               ip link set veth$i up
+               ip -n ns$i link set veth0 up
+
+               ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+               ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+               # Add a neigh entry for IPv4 ping test
+               ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+               ip -n ns$i link set veth0 $mode obj \
+                       xdp_dummy.o sec xdp_dummy &> /dev/null || \
+                       { test_fail "Unable to load dummy xdp" && exit 1; }
+               IFACES="$IFACES veth$i"
+               veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
+       done
+}
+
+do_egress_tests()
+{
+       local mode=$1
+
+       # mac test
+       ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+       ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
+       sleep 0.5
+       ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+       sleep 0.5
+       pkill -9 tcpdump
+
+       # mac check
+       grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
+              test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+       grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
+               test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+       local mode=$1
+
+       # ping6 test: echo request should be redirect back to itself, not others
+       ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+       ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+       ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+       ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
+       sleep 0.5
+       # ARP test
+       ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+       # IPv4 test
+       ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+       # IPv6 test
+       ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+       sleep 0.5
+       pkill -9 tcpdump
+
+       # All netns should receive the redirect arp requests
+       [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
+               test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+               test_fail "$mode arp(F_BROADCAST) ns1-1"
+       [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
+               test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+               test_fail "$mode arp(F_BROADCAST) ns1-2"
+       [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
+               test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+               test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+       # ns1 should not receive the redirect echo request, others should
+       [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+               test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+       [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+               test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+       [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+               test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+       # ns1 should receive the echo request, ns2 should not
+       [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv6 (no flags) ns1-1" || \
+               test_fail "$mode IPv6 (no flags) ns1-1"
+       [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
+               test_pass "$mode IPv6 (no flags) ns1-2" || \
+               test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+       local mode=$1
+       local drv_p
+
+       case ${mode} in
+               xdpdrv)  drv_p="-N";;
+               xdpegress) drv_p="-X";;
+               xdpgeneric) drv_p="-S";;
+       esac
+
+       ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
+       xdp_pid=$!
+       sleep 1
+
+       if [ "$mode" = "xdpegress" ]; then
+               do_egress_tests $mode
+       else
+               do_ping_tests $mode
+       fi
+
+       kill $xdp_pid
+}
+
+trap clean_up 0 2 3 6 9
+
+check_env
+rm -f xdp_redirect_*.log ns*.log mac_ns*.log
+
+for mode in ${DRV_MODE}; do
+       setup_ns $mode
+       do_tests $mode
+       clean_up
+done
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c

new file mode 100644 (file)

index 0000000..3696a8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+       __u32 prog_id = 0;
+       int i;
+
+       for (i = 0; ifaces[i] > 0; i++) {
+               if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+                       printf("bpf_get_link_xdp_id failed\n");
+                       exit(1);
+               }
+               if (prog_id)
+                       bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+       }
+
+       exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+       char ifname[IF_NAMESIZE];
+       struct ifreq ifr;
+       int fd, ret = -1;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return ret;
+
+       if (!if_indextoname(ifindex, ifname))
+               goto err_out;
+
+       strcpy(ifr.ifr_name, ifname);
+
+       if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+               goto err_out;
+
+       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+       ret = 0;
+
+err_out:
+       close(fd);
+       return ret;
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+               "OPTS:\n"
+               "    -S    use skb-mode\n"
+               "    -N    enforce native mode\n"
+               "    -F    force loading prog\n"
+               "    -X    load xdp program on egress\n",
+               prog);
+}
+
+int main(int argc, char **argv)
+{
+       int prog_fd, group_all, mac_map;
+       struct bpf_program *ingress_prog, *egress_prog;
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type = BPF_PROG_TYPE_UNSPEC,
+       };
+       int i, ret, opt, egress_prog_fd = 0;
+       struct bpf_devmap_val devmap_val;
+       bool attach_egress_prog = false;
+       unsigned char mac_addr[6];
+       char ifname[IF_NAMESIZE];
+       struct bpf_object *obj;
+       unsigned int ifindex;
+       char filename[256];
+
+       while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+               switch (opt) {
+               case 'S':
+                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       /* default, set below */
+                       break;
+               case 'F':
+                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       break;
+               case 'X':
+                       attach_egress_prog = true;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+               xdp_flags |= XDP_FLAGS_DRV_MODE;
+       } else if (attach_egress_prog) {
+               printf("Load xdp program on egress with SKB mode not supported yet\n");
+               goto err_out;
+       }
+
+       if (optind == argc) {
+               printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+               goto err_out;
+       }
+
+       printf("Get interfaces");
+       for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+               ifaces[i] = if_nametoindex(argv[optind + i]);
+               if (!ifaces[i])
+                       ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+               if (!if_indextoname(ifaces[i], ifname)) {
+                       perror("Invalid interface name or i");
+                       goto err_out;
+               }
+               if (ifaces[i] > MAX_INDEX_NUM) {
+                       printf("Interface index to large\n");
+                       goto err_out;
+               }
+               printf(" %d", ifaces[i]);
+       }
+       printf("\n");
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       prog_load_attr.file = filename;
+
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+               goto err_out;
+
+       if (attach_egress_prog)
+               group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+       else
+               group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+       mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+       if (group_all < 0 || mac_map < 0) {
+               printf("bpf_object__find_map_fd_by_name failed\n");
+               goto err_out;
+       }
+
+       if (attach_egress_prog) {
+               /* Find ingress/egress prog for 2nd xdp prog */
+               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+               egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+               if (!ingress_prog || !egress_prog) {
+                       printf("finding ingress/egress_prog in obj file failed\n");
+                       goto err_out;
+               }
+               prog_fd = bpf_program__fd(ingress_prog);
+               egress_prog_fd = bpf_program__fd(egress_prog);
+               if (prog_fd < 0 || egress_prog_fd < 0) {
+                       printf("find egress_prog fd failed\n");
+                       goto err_out;
+               }
+       }
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       /* Init forward multicast groups and exclude group */
+       for (i = 0; ifaces[i] > 0; i++) {
+               ifindex = ifaces[i];
+
+               if (attach_egress_prog) {
+                       ret = get_mac_addr(ifindex, mac_addr);
+                       if (ret < 0) {
+                               printf("get interface %d mac failed\n", ifindex);
+                               goto err_out;
+                       }
+                       ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+                       if (ret) {
+                               perror("bpf_update_elem mac_map failed\n");
+                               goto err_out;
+                       }
+               }
+
+               /* Add all the interfaces to group all */
+               devmap_val.ifindex = ifindex;
+               devmap_val.bpf_prog.fd = egress_prog_fd;
+               ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+               if (ret) {
+                       perror("bpf_map_update_elem");
+                       goto err_out;
+               }
+
+               /* bind prog_fd to each interface */
+               ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+               if (ret) {
+                       printf("Set xdp fd failed on %d\n", ifindex);
+                       goto err_out;
+               }
+       }
+
+       /* sleep some time for testing */
+       sleep(999);
+
+       return 0;
+
+err_out:
+       return 1;
+}
author	David S. Miller <davem@davemloft.net>
	Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
committer	David S. Miller <davem@davemloft.net>
	Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
Documentation/bpf/index.rst		patch \| blob \| history
Documentation/bpf/llvm_reloc.rst	[new file with mode: 0644]	patch \| blob
Documentation/networking/ip-sysctl.rst		patch \| blob \| history
include/linux/bpf.h		patch \| blob \| history
include/linux/bpf_local_storage.h		patch \| blob \| history
include/linux/filter.h		patch \| blob \| history
include/net/netns/ipv4.h		patch \| blob \| history
include/net/sock_reuseport.h		patch \| blob \| history
include/net/xdp.h		patch \| blob \| history
include/trace/events/xdp.h		patch \| blob \| history
include/uapi/linux/bpf.h		patch \| blob \| history
kernel/bpf/bpf_inode_storage.c		patch \| blob \| history
kernel/bpf/bpf_lsm.c		patch \| blob \| history
kernel/bpf/btf.c		patch \| blob \| history
kernel/bpf/core.c		patch \| blob \| history
kernel/bpf/cpumap.c		patch \| blob \| history
kernel/bpf/devmap.c		patch \| blob \| history
kernel/bpf/hashtab.c		patch \| blob \| history
kernel/bpf/preload/iterators/iterators.bpf.c		patch \| blob \| history
kernel/bpf/reuseport_array.c		patch \| blob \| history
kernel/bpf/syscall.c		patch \| blob \| history
kernel/bpf/tnum.c		patch \| blob \| history
kernel/bpf/trampoline.c		patch \| blob \| history
kernel/bpf/verifier.c		patch \| blob \| history
net/core/filter.c		patch \| blob \| history
net/core/sock_reuseport.c		patch \| blob \| history
net/core/xdp.c		patch \| blob \| history
net/ipv4/inet_connection_sock.c		patch \| blob \| history
net/ipv4/inet_hashtables.c		patch \| blob \| history
net/ipv4/sysctl_net_ipv4.c		patch \| blob \| history
net/ipv4/tcp_ipv4.c		patch \| blob \| history
net/ipv4/tcp_minisocks.c		patch \| blob \| history
net/ipv6/tcp_ipv6.c		patch \| blob \| history
net/xdp/xdp_umem.c		patch \| blob \| history
net/xdp/xskmap.c		patch \| blob \| history
samples/bpf/Makefile		patch \| blob \| history
samples/bpf/ibumad_kern.c		patch \| blob \| history
samples/bpf/ibumad_user.c		patch \| blob \| history
samples/bpf/xdp_fwd_user.c		patch \| blob \| history
samples/bpf/xdp_redirect_map_multi_kern.c	[new file with mode: 0644]	patch \| blob
samples/bpf/xdp_redirect_map_multi_user.c	[new file with mode: 0644]	patch \| blob
samples/bpf/xdp_sample_pkts_user.c		patch \| blob \| history
tools/bpf/bpftool/Makefile		patch \| blob \| history
tools/bpf/bpftool/gen.c		patch \| blob \| history
tools/bpf/bpftool/main.c		patch \| blob \| history
tools/include/uapi/linux/bpf.h		patch \| blob \| history
tools/lib/bpf/Makefile		patch \| blob \| history
tools/lib/bpf/bpf.c		patch \| blob \| history
tools/lib/bpf/bpf.h		patch \| blob \| history
tools/lib/bpf/bpf_helpers.h		patch \| blob \| history
tools/lib/bpf/bpf_prog_linfo.c		patch \| blob \| history
tools/lib/bpf/bpf_tracing.h		patch \| blob \| history
tools/lib/bpf/btf.c		patch \| blob \| history
tools/lib/bpf/btf_dump.c		patch \| blob \| history
tools/lib/bpf/libbpf.c		patch \| blob \| history
tools/lib/bpf/libbpf.h		patch \| blob \| history
tools/lib/bpf/libbpf.map		patch \| blob \| history
tools/lib/bpf/libbpf_errno.c		patch \| blob \| history
tools/lib/bpf/libbpf_internal.h		patch \| blob \| history
tools/lib/bpf/libbpf_legacy.h	[new file with mode: 0644]	patch \| blob
tools/lib/bpf/linker.c		patch \| blob \| history
tools/lib/bpf/netlink.c		patch \| blob \| history
tools/lib/bpf/ringbuf.c		patch \| blob \| history
tools/testing/selftests/bpf/.gitignore		patch \| blob \| history
tools/testing/selftests/bpf/Makefile		patch \| blob \| history
tools/testing/selftests/bpf/Makefile.docs		patch \| blob \| history
tools/testing/selftests/bpf/README.rst		patch \| blob \| history
tools/testing/selftests/bpf/bench.c		patch \| blob \| history
tools/testing/selftests/bpf/benchs/bench_rename.c		patch \| blob \| history
tools/testing/selftests/bpf/benchs/bench_ringbufs.c		patch \| blob \| history
tools/testing/selftests/bpf/benchs/bench_trigger.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/attach_probe.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/bpf_iter.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/btf.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/btf_dump.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/btf_write.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cgroup_link.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/check_mtu.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/core_reloc.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/flow_dissector.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/hashmap.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/kfree_skb.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/ksyms_btf.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/link_pinning.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/prog_tests/obj_name.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/perf_branches.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/perf_buffer.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/probe_user.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/rdonly_maps.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/reference_tracking.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/resolve_btfids.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/select_reuseport.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/send_signal.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sk_lookup.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sock_fields.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_map.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/test_overhead.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/trampoline_count.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/udp_limit.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c		patch \| blob \| history
tools/testing/selftests/bpf/prog_tests/xdp_link.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_netlink.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_task.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_task_file.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_udp4.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/bpf_iter_udp6.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/test_lookup_and_delete.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_migrate_reuseport.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/progs/test_snprintf.c		patch \| blob \| history
tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c	[new file with mode: 0644]	patch \| blob
tools/testing/selftests/bpf/test_doc_build.sh		patch \| blob \| history
tools/testing/selftests/bpf/test_lru_map.c		patch \| blob \| history
tools/testing/selftests/bpf/test_maps.c		patch \| blob \| history
tools/testing/selftests/bpf/test_progs.c		patch \| blob \| history
tools/testing/selftests/bpf/test_progs.h		patch \| blob \| history
tools/testing/selftests/bpf/test_tcpnotify_user.c		patch \| blob \| history
tools/testing/selftests/bpf/test_xdp_redirect_multi.sh	[new file with mode: 0755]	patch \| blob
tools/testing/selftests/bpf/xdp_redirect_multi.c	[new file with mode: 0644]	patch \| blob