Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <davem@davemloft.net>
Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
committerDavid S. Miller <davem@davemloft.net>
Thu, 17 Jun 2021 18:54:56 +0000 (11:54 -0700)
Daniel Borkmann says:

====================
pull-request: bpf-next 2021-06-17

The following pull-request contains BPF updates for your *net-next* tree.

We've added 50 non-merge commits during the last 25 day(s) which contain
a total of 148 files changed, 4779 insertions(+), 1248 deletions(-).

The main changes are:

1) BPF infrastructure to migrate TCP child sockets from a listener to another
   in the same reuseport group/map, from Kuniyuki Iwashima.

2) Add a provably sound, faster and more precise algorithm for tnum_mul() as
   noted in https://arxiv.org/abs/2105.05398, from Harishankar Vishwanathan.

3) Streamline error reporting changes in libbpf as planned out in the
   'libbpf: the road to v1.0' effort, from Andrii Nakryiko.

4) Add broadcast support to xdp_redirect_map(), from Hangbin Liu.

5) Extends bpf_map_lookup_and_delete_elem() functionality to 4 more map
   types, that is, {LRU_,PERCPU_,LRU_PERCPU_,}HASH, from Denis Salopek.

6) Support new LLVM relocations in libbpf to make them more linker friendly,
   also add a doc to describe the BPF backend relocations, from Yonghong Song.

7) Silence long standing KUBSAN complaints on register-based shifts in
   interpreter, from Daniel Borkmann and Eric Biggers.

8) Add dummy PT_REGS macros in libbpf to fail BPF program compilation when
   target arch cannot be determined, from Lorenz Bauer.

9) Extend AF_XDP to support large umems with 1M+ pages, from Magnus Karlsson.

10) Fix two minor libbpf tc BPF API issues, from Kumar Kartikeya Dwivedi.

11) Move libbpf BPF_SEQ_PRINTF/BPF_SNPRINTF macros that can be used by BPF
    programs to bpf_helpers.h header, from Florent Revest.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
146 files changed:
Documentation/bpf/index.rst
Documentation/bpf/llvm_reloc.rst [new file with mode: 0644]
Documentation/networking/ip-sysctl.rst
include/linux/bpf.h
include/linux/bpf_local_storage.h
include/linux/filter.h
include/net/netns/ipv4.h
include/net/sock_reuseport.h
include/net/xdp.h
include/trace/events/xdp.h
include/uapi/linux/bpf.h
kernel/bpf/bpf_inode_storage.c
kernel/bpf/bpf_lsm.c
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/cpumap.c
kernel/bpf/devmap.c
kernel/bpf/hashtab.c
kernel/bpf/preload/iterators/iterators.bpf.c
kernel/bpf/reuseport_array.c
kernel/bpf/syscall.c
kernel/bpf/tnum.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
net/core/filter.c
net/core/sock_reuseport.c
net/core/xdp.c
net/ipv4/inet_connection_sock.c
net/ipv4/inet_hashtables.c
net/ipv4/sysctl_net_ipv4.c
net/ipv4/tcp_ipv4.c
net/ipv4/tcp_minisocks.c
net/ipv6/tcp_ipv6.c
net/xdp/xdp_umem.c
net/xdp/xskmap.c
samples/bpf/Makefile
samples/bpf/ibumad_kern.c
samples/bpf/ibumad_user.c
samples/bpf/xdp_fwd_user.c
samples/bpf/xdp_redirect_map_multi_kern.c [new file with mode: 0644]
samples/bpf/xdp_redirect_map_multi_user.c [new file with mode: 0644]
samples/bpf/xdp_sample_pkts_user.c
tools/bpf/bpftool/Makefile
tools/bpf/bpftool/gen.c
tools/bpf/bpftool/main.c
tools/include/uapi/linux/bpf.h
tools/lib/bpf/Makefile
tools/lib/bpf/bpf.c
tools/lib/bpf/bpf.h
tools/lib/bpf/bpf_helpers.h
tools/lib/bpf/bpf_prog_linfo.c
tools/lib/bpf/bpf_tracing.h
tools/lib/bpf/btf.c
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/libbpf.h
tools/lib/bpf/libbpf.map
tools/lib/bpf/libbpf_errno.c
tools/lib/bpf/libbpf_internal.h
tools/lib/bpf/libbpf_legacy.h [new file with mode: 0644]
tools/lib/bpf/linker.c
tools/lib/bpf/netlink.c
tools/lib/bpf/ringbuf.c
tools/testing/selftests/bpf/.gitignore
tools/testing/selftests/bpf/Makefile
tools/testing/selftests/bpf/Makefile.docs
tools/testing/selftests/bpf/README.rst
tools/testing/selftests/bpf/bench.c
tools/testing/selftests/bpf/benchs/bench_rename.c
tools/testing/selftests/bpf/benchs/bench_ringbufs.c
tools/testing/selftests/bpf/benchs/bench_trigger.c
tools/testing/selftests/bpf/prog_tests/attach_probe.c
tools/testing/selftests/bpf/prog_tests/bpf_iter.c
tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
tools/testing/selftests/bpf/prog_tests/btf.c
tools/testing/selftests/bpf/prog_tests/btf_dump.c
tools/testing/selftests/bpf/prog_tests/btf_write.c
tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
tools/testing/selftests/bpf/prog_tests/cgroup_link.c
tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
tools/testing/selftests/bpf/prog_tests/check_mtu.c
tools/testing/selftests/bpf/prog_tests/core_reloc.c
tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
tools/testing/selftests/bpf/prog_tests/flow_dissector.c
tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
tools/testing/selftests/bpf/prog_tests/hashmap.c
tools/testing/selftests/bpf/prog_tests/kfree_skb.c
tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
tools/testing/selftests/bpf/prog_tests/link_pinning.c
tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c [new file with mode: 0644]
tools/testing/selftests/bpf/prog_tests/obj_name.c
tools/testing/selftests/bpf/prog_tests/perf_branches.c
tools/testing/selftests/bpf/prog_tests/perf_buffer.c
tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
tools/testing/selftests/bpf/prog_tests/probe_user.c
tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
tools/testing/selftests/bpf/prog_tests/reference_tracking.c
tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
tools/testing/selftests/bpf/prog_tests/select_reuseport.c
tools/testing/selftests/bpf/prog_tests/send_signal.c
tools/testing/selftests/bpf/prog_tests/sk_lookup.c
tools/testing/selftests/bpf/prog_tests/sock_fields.c
tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
tools/testing/selftests/bpf/prog_tests/test_overhead.c
tools/testing/selftests/bpf/prog_tests/trampoline_count.c
tools/testing/selftests/bpf/prog_tests/udp_limit.c
tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
tools/testing/selftests/bpf/prog_tests/xdp_link.c
tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
tools/testing/selftests/bpf/progs/bpf_iter_task.c
tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
tools/testing/selftests/bpf/progs/test_lookup_and_delete.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_migrate_reuseport.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/test_snprintf.c
tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c [new file with mode: 0644]
tools/testing/selftests/bpf/test_doc_build.sh
tools/testing/selftests/bpf/test_lru_map.c
tools/testing/selftests/bpf/test_maps.c
tools/testing/selftests/bpf/test_progs.c
tools/testing/selftests/bpf/test_progs.h
tools/testing/selftests/bpf/test_tcpnotify_user.c
tools/testing/selftests/bpf/test_xdp_redirect_multi.sh [new file with mode: 0755]
tools/testing/selftests/bpf/xdp_redirect_multi.c [new file with mode: 0644]

index a702f67..93e8cf1 100644 (file)
@@ -84,6 +84,7 @@ Other
    :maxdepth: 1
 
    ringbuf
+   llvm_reloc
 
 .. Links:
 .. _networking-filter: ../networking/filter.rst
diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst
new file mode 100644 (file)
index 0000000..ca8957d
--- /dev/null
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+====================
+BPF LLVM Relocations
+====================
+
+This document describes LLVM BPF backend relocation types.
+
+Relocation Record
+=================
+
+LLVM BPF backend records each relocation with the following 16-byte
+ELF structure::
+
+  typedef struct
+  {
+    Elf64_Addr    r_offset;  // Offset from the beginning of section.
+    Elf64_Xword   r_info;    // Relocation type and symbol index.
+  } Elf64_Rel;
+
+For example, for the following code::
+
+  int g1 __attribute__((section("sec")));
+  int g2 __attribute__((section("sec")));
+  static volatile int l1 __attribute__((section("sec")));
+  static volatile int l2 __attribute__((section("sec")));
+  int test() {
+    return g1 + g2 + l1 + l2;
+  }
+
+Compiled with ``clang -target bpf -O2 -c test.c``, the following is
+the code with ``llvm-objdump -dr test.o``::
+
+       0:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+                0000000000000000:  R_BPF_64_64  g1
+       2:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+       3:       18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll
+                0000000000000018:  R_BPF_64_64  g2
+       5:       61 20 00 00 00 00 00 00 r0 = *(u32 *)(r2 + 0)
+       6:       0f 10 00 00 00 00 00 00 r0 += r1
+       7:       18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll
+                0000000000000038:  R_BPF_64_64  sec
+       9:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+      10:       0f 10 00 00 00 00 00 00 r0 += r1
+      11:       18 01 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 r1 = 12 ll
+                0000000000000058:  R_BPF_64_64  sec
+      13:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+      14:       0f 10 00 00 00 00 00 00 r0 += r1
+      15:       95 00 00 00 00 00 00 00 exit
+
+There are four relations in the above for four ``LD_imm64`` instructions.
+The following ``llvm-readelf -r test.o`` shows the binary values of the four
+relocations::
+
+  Relocation section '.rel.text' at offset 0x190 contains 4 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000000  0000000600000001 R_BPF_64_64            0000000000000000 g1
+  0000000000000018  0000000700000001 R_BPF_64_64            0000000000000004 g2
+  0000000000000038  0000000400000001 R_BPF_64_64            0000000000000000 sec
+  0000000000000058  0000000400000001 R_BPF_64_64            0000000000000000 sec
+
+Each relocation is represented by ``Offset`` (8 bytes) and ``Info`` (8 bytes).
+For example, the first relocation corresponds to the first instruction
+(Offset 0x0) and the corresponding ``Info`` indicates the relocation type
+of ``R_BPF_64_64`` (type 1) and the entry in the symbol table (entry 6).
+The following is the symbol table with ``llvm-readelf -s test.o``::
+
+  Symbol table '.symtab' contains 8 entries:
+     Num:    Value          Size Type    Bind   Vis       Ndx Name
+       0: 0000000000000000     0 NOTYPE  LOCAL  DEFAULT   UND
+       1: 0000000000000000     0 FILE    LOCAL  DEFAULT   ABS test.c
+       2: 0000000000000008     4 OBJECT  LOCAL  DEFAULT     4 l1
+       3: 000000000000000c     4 OBJECT  LOCAL  DEFAULT     4 l2
+       4: 0000000000000000     0 SECTION LOCAL  DEFAULT     4 sec
+       5: 0000000000000000   128 FUNC    GLOBAL DEFAULT     2 test
+       6: 0000000000000000     4 OBJECT  GLOBAL DEFAULT     4 g1
+       7: 0000000000000004     4 OBJECT  GLOBAL DEFAULT     4 g2
+
+The 6th entry is global variable ``g1`` with value 0.
+
+Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3,
+for global variable ``g2`` which has a symbol value 4, the offset
+from the start of ``.data`` section.
+
+The third and fourth relocations refers to static variables ``l1``
+and ``l2``. From ``.rel.text`` section above, it is not clear
+which symbols they really refers to as they both refers to
+symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type
+and represents a section. So for static variable or function,
+the section offset is written to the original insn
+buffer, which is called ``A`` (addend). Looking at
+above insn ``7`` and ``11``, they have section offset ``8`` and ``12``.
+From symbol table, we can find that they correspond to entries ``2``
+and ``3`` for ``l1`` and ``l2``.
+
+In general, the ``A`` is 0 for global variables and functions,
+and is the section offset or some computation result based on
+section offset for static variables/functions. The non-section-offset
+case refers to function calls. See below for more details.
+
+Different Relocation Types
+==========================
+
+Six relocation types are supported. The following is an overview and
+``S`` represents the value of the symbol in the symbol table::
+
+  Enum  ELF Reloc Type     Description      BitSize  Offset        Calculation
+  0     R_BPF_NONE         None
+  1     R_BPF_64_64        ld_imm64 insn    32       r_offset + 4  S + A
+  2     R_BPF_64_ABS64     normal data      64       r_offset      S + A
+  3     R_BPF_64_ABS32     normal data      32       r_offset      S + A
+  4     R_BPF_64_NODYLD32  .BTF[.ext] data  32       r_offset      S + A
+  10    R_BPF_64_32        call insn        32       r_offset + 4  (S + A) / 8 - 1
+
+For example, ``R_BPF_64_64`` relocation type is used for ``ld_imm64`` instruction.
+The actual to-be-relocated data (0 or section offset)
+is stored at ``r_offset + 4`` and the read/write
+data bitsize is 32 (4 bytes). The relocation can be resolved with
+the symbol value plus implicit addend. Note that the ``BitSize`` is 32 which
+means the section offset must be less than or equal to ``UINT32_MAX`` and this
+is enforced by LLVM BPF backend.
+
+In another case, ``R_BPF_64_ABS64`` relocation type is used for normal 64-bit data.
+The actual to-be-relocated data is stored at ``r_offset`` and the read/write data
+bitsize is 64 (8 bytes). The relocation can be resolved with
+the symbol value plus implicit addend.
+
+Both ``R_BPF_64_ABS32`` and ``R_BPF_64_NODYLD32`` types are for 32-bit data.
+But ``R_BPF_64_NODYLD32`` specifically refers to relocations in ``.BTF`` and
+``.BTF.ext`` sections. For cases like bcc where llvm ``ExecutionEngine RuntimeDyld``
+is involved, ``R_BPF_64_NODYLD32`` types of relocations should not be resolved
+to actual function/variable address. Otherwise, ``.BTF`` and ``.BTF.ext``
+become unusable by bcc and kernel.
+
+Type ``R_BPF_64_32`` is used for call instruction. The call target section
+offset is stored at ``r_offset + 4`` (32bit) and calculated as
+``(S + A) / 8 - 1``.
+
+Examples
+========
+
+Types ``R_BPF_64_64`` and ``R_BPF_64_32`` are used to resolve ``ld_imm64``
+and ``call`` instructions. For example::
+
+  __attribute__((noinline)) __attribute__((section("sec1")))
+  int gfunc(int a, int b) {
+    return a * b;
+  }
+  static __attribute__((noinline)) __attribute__((section("sec1")))
+  int lfunc(int a, int b) {
+    return a + b;
+  }
+  int global __attribute__((section("sec2")));
+  int test(int a, int b) {
+    return gfunc(a, b) +  lfunc(a, b) + global;
+  }
+
+Compiled with ``clang -target bpf -O2 -c test.c``, we will have
+following code with `llvm-objdump -dr test.o``::
+
+  Disassembly of section .text:
+
+  0000000000000000 <test>:
+         0:       bf 26 00 00 00 00 00 00 r6 = r2
+         1:       bf 17 00 00 00 00 00 00 r7 = r1
+         2:       85 10 00 00 ff ff ff ff call -1
+                  0000000000000010:  R_BPF_64_32  gfunc
+         3:       bf 08 00 00 00 00 00 00 r8 = r0
+         4:       bf 71 00 00 00 00 00 00 r1 = r7
+         5:       bf 62 00 00 00 00 00 00 r2 = r6
+         6:       85 10 00 00 02 00 00 00 call 2
+                  0000000000000030:  R_BPF_64_32  sec1
+         7:       0f 80 00 00 00 00 00 00 r0 += r8
+         8:       18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll
+                  0000000000000040:  R_BPF_64_64  global
+        10:       61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0)
+        11:       0f 10 00 00 00 00 00 00 r0 += r1
+        12:       95 00 00 00 00 00 00 00 exit
+
+  Disassembly of section sec1:
+
+  0000000000000000 <gfunc>:
+         0:       bf 20 00 00 00 00 00 00 r0 = r2
+         1:       2f 10 00 00 00 00 00 00 r0 *= r1
+         2:       95 00 00 00 00 00 00 00 exit
+
+  0000000000000018 <lfunc>:
+         3:       bf 20 00 00 00 00 00 00 r0 = r2
+         4:       0f 10 00 00 00 00 00 00 r0 += r1
+         5:       95 00 00 00 00 00 00 00 exit
+
+The first relocation corresponds to ``gfunc(a, b)`` where ``gfunc`` has a value of 0,
+so the ``call`` instruction offset is ``(0 + 0)/8 - 1 = -1``.
+The second relocation corresponds to ``lfunc(a, b)`` where ``lfunc`` has a section
+offset ``0x18``, so the ``call`` instruction offset is ``(0 + 0x18)/8 - 1 = 2``.
+The third relocation corresponds to ld_imm64 of ``global``, which has a section
+offset ``0``.
+
+The following is an example to show how R_BPF_64_ABS64 could be generated::
+
+  int global() { return 0; }
+  struct t { void *g; } gbl = { global };
+
+Compiled with ``clang -target bpf -O2 -g -c test.c``, we will see a
+relocation below in ``.data`` section with command
+``llvm-readelf -r test.o``::
+
+  Relocation section '.rel.data' at offset 0x458 contains 1 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000000  0000000700000002 R_BPF_64_ABS64         0000000000000000 global
+
+The relocation says the first 8-byte of ``.data`` section should be
+filled with address of ``global`` variable.
+
+With ``llvm-readelf`` output, we can see that dwarf sections have a bunch of
+``R_BPF_64_ABS32`` and ``R_BPF_64_ABS64`` relocations::
+
+  Relocation section '.rel.debug_info' at offset 0x468 contains 13 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000006  0000000300000003 R_BPF_64_ABS32         0000000000000000 .debug_abbrev
+  000000000000000c  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000012  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000016  0000000600000003 R_BPF_64_ABS32         0000000000000000 .debug_line
+  000000000000001a  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  000000000000001e  0000000200000002 R_BPF_64_ABS64         0000000000000000 .text
+  000000000000002b  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  0000000000000037  0000000800000002 R_BPF_64_ABS64         0000000000000000 gbl
+  0000000000000040  0000000400000003 R_BPF_64_ABS32         0000000000000000 .debug_str
+  ......
+
+The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations::
+
+  Relocation section '.rel.BTF' at offset 0x538 contains 1 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  0000000000000084  0000000800000004 R_BPF_64_NODYLD32      0000000000000000 gbl
+
+  Relocation section '.rel.BTF.ext' at offset 0x548 contains 2 entries:
+      Offset             Info             Type               Symbol's Value  Symbol's Name
+  000000000000002c  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
+  0000000000000040  0000000200000004 R_BPF_64_NODYLD32      0000000000000000 .text
index a5c2500..b0436d3 100644 (file)
@@ -761,6 +761,31 @@ tcp_syncookies - INTEGER
        network connections you can set this knob to 2 to enable
        unconditionally generation of syncookies.
 
+tcp_migrate_req - BOOLEAN
+       The incoming connection is tied to a specific listening socket when
+       the initial SYN packet is received during the three-way handshake.
+       When a listener is closed, in-flight request sockets during the
+       handshake and established sockets in the accept queue are aborted.
+
+       If the listener has SO_REUSEPORT enabled, other listeners on the
+       same port should have been able to accept such connections. This
+       option makes it possible to migrate such child sockets to another
+       listener after close() or shutdown().
+
+       The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should
+       usually be used to define the policy to pick an alive listener.
+       Otherwise, the kernel will randomly pick an alive listener only if
+       this option is enabled.
+
+       Note that migration between listeners with different settings may
+       crash applications. Let's say migration happens from listener A to
+       B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from
+       the requests migrated from A. To avoid such a situation, cancel
+       migration by returning SK_DROP in the type of eBPF program, or
+       disable this option.
+
+       Default: 0
+
 tcp_fastopen - INTEGER
        Enable TCP Fast Open (RFC7413) to send and accept data in the opening
        SYN packet.
index 9dc44ba..f309fc1 100644 (file)
@@ -70,6 +70,8 @@ struct bpf_map_ops {
        void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
        int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
                                union bpf_attr __user *uattr);
+       int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
+                                         void *value, u64 flags);
        int (*map_lookup_and_delete_batch)(struct bpf_map *map,
                                           const union bpf_attr *attr,
                                           union bpf_attr __user *uattr);
@@ -1499,8 +1501,13 @@ int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
                    struct net_device *dev_rx);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
                    struct net_device *dev_rx);
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+                         struct bpf_map *map, bool exclude_ingress);
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
                             struct bpf_prog *xdp_prog);
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+                          struct bpf_prog *xdp_prog, struct bpf_map *map,
+                          bool exclude_ingress);
 bool dev_map_can_have_prog(struct bpf_map *map);
 
 void __cpu_map_flush(void);
@@ -1668,6 +1675,13 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
        return 0;
 }
 
+static inline
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+                         struct bpf_map *map, bool exclude_ingress)
+{
+       return 0;
+}
+
 struct sk_buff;
 
 static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
@@ -1677,6 +1691,14 @@ static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
        return 0;
 }
 
+static inline
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+                          struct bpf_prog *xdp_prog, struct bpf_map *map,
+                          bool exclude_ingress)
+{
+       return 0;
+}
+
 static inline void __cpu_map_flush(void)
 {
 }
@@ -2026,6 +2048,7 @@ struct sk_reuseport_kern {
        struct sk_buff *skb;
        struct sock *sk;
        struct sock *selected_sk;
+       struct sock *migrating_sk;
        void *data_end;
        u32 hash;
        u32 reuseport_id;
index b902c58..24496bc 100644 (file)
@@ -58,7 +58,7 @@ struct bpf_local_storage_data {
         * from the object's bpf_local_storage.
         *
         * Put it in the same cacheline as the data to minimize
-        * the number of cachelines access during the cache hit case.
+        * the number of cachelines accessed during the cache hit case.
         */
        struct bpf_local_storage_map __rcu *smap;
        u8 data[] __aligned(8);
@@ -71,7 +71,7 @@ struct bpf_local_storage_elem {
        struct bpf_local_storage __rcu *local_storage;
        struct rcu_head rcu;
        /* 8 bytes hole */
-       /* The data is stored in aother cacheline to minimize
+       /* The data is stored in another cacheline to minimize
         * the number of cachelines access during a cache hit.
         */
        struct bpf_local_storage_data sdata ____cacheline_aligned;
index 9a09547..688856e 100644 (file)
@@ -646,6 +646,7 @@ struct bpf_redirect_info {
        u32 flags;
        u32 tgt_index;
        void *tgt_value;
+       struct bpf_map *map;
        u32 map_id;
        enum bpf_map_type map_type;
        u32 kern_flags;
@@ -995,11 +996,13 @@ void bpf_warn_invalid_xdp_action(u32 act);
 #ifdef CONFIG_INET
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                                  struct bpf_prog *prog, struct sk_buff *skb,
+                                 struct sock *migrating_sk,
                                  u32 hash);
 #else
 static inline struct sock *
 bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                     struct bpf_prog *prog, struct sk_buff *skb,
+                    struct sock *migrating_sk,
                     u32 hash)
 {
        return NULL;
@@ -1464,17 +1467,19 @@ static inline bool bpf_sk_lookup_run_v6(struct net *net, int protocol,
 }
 #endif /* IS_ENABLED(CONFIG_IPV6) */
 
-static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex, u64 flags,
+static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifindex,
+                                                 u64 flags, const u64 flag_mask,
                                                  void *lookup_elem(struct bpf_map *map, u32 key))
 {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       const u64 action_mask = XDP_ABORTED | XDP_DROP | XDP_PASS | XDP_TX;
 
        /* Lower bits of the flags are used as return code on lookup failure */
-       if (unlikely(flags > XDP_TX))
+       if (unlikely(flags & ~(action_mask | flag_mask)))
                return XDP_ABORTED;
 
        ri->tgt_value = lookup_elem(map, ifindex);
-       if (unlikely(!ri->tgt_value)) {
+       if (unlikely(!ri->tgt_value) && !(flags & BPF_F_BROADCAST)) {
                /* If the lookup fails we want to clear out the state in the
                 * redirect_info struct completely, so that if an eBPF program
                 * performs multiple lookups, the last one always takes
@@ -1482,13 +1487,21 @@ static __always_inline int __bpf_xdp_redirect_map(struct bpf_map *map, u32 ifind
                 */
                ri->map_id = INT_MAX; /* Valid map id idr range: [1,INT_MAX[ */
                ri->map_type = BPF_MAP_TYPE_UNSPEC;
-               return flags;
+               return flags & action_mask;
        }
 
        ri->tgt_index = ifindex;
        ri->map_id = map->id;
        ri->map_type = map->map_type;
 
+       if (flags & BPF_F_BROADCAST) {
+               WRITE_ONCE(ri->map, map);
+               ri->flags = flags;
+       } else {
+               WRITE_ONCE(ri->map, NULL);
+               ri->flags = 0;
+       }
+
        return XDP_REDIRECT;
 }
 
index 746c80c..b862051 100644 (file)
@@ -126,6 +126,7 @@ struct netns_ipv4 {
        u8 sysctl_tcp_syn_retries;
        u8 sysctl_tcp_synack_retries;
        u8 sysctl_tcp_syncookies;
+       u8 sysctl_tcp_migrate_req;
        int sysctl_tcp_reordering;
        u8 sysctl_tcp_retries1;
        u8 sysctl_tcp_retries2;
index 505f1e1..473b0b0 100644 (file)
@@ -13,8 +13,9 @@ extern spinlock_t reuseport_lock;
 struct sock_reuseport {
        struct rcu_head         rcu;
 
-       u16                     max_socks;      /* length of socks */
-       u16                     num_socks;      /* elements in socks */
+       u16                     max_socks;              /* length of socks */
+       u16                     num_socks;              /* elements in socks */
+       u16                     num_closed_socks;       /* closed elements in socks */
        /* The last synq overflow event timestamp of this
         * reuse->socks[] group.
         */
@@ -31,10 +32,14 @@ extern int reuseport_alloc(struct sock *sk, bool bind_inany);
 extern int reuseport_add_sock(struct sock *sk, struct sock *sk2,
                              bool bind_inany);
 extern void reuseport_detach_sock(struct sock *sk);
+void reuseport_stop_listen_sock(struct sock *sk);
 extern struct sock *reuseport_select_sock(struct sock *sk,
                                          u32 hash,
                                          struct sk_buff *skb,
                                          int hdr_len);
+struct sock *reuseport_migrate_sock(struct sock *sk,
+                                   struct sock *migrating_sk,
+                                   struct sk_buff *skb);
 extern int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog);
 extern int reuseport_detach_prog(struct sock *sk);
 
index a5bc214..5533f0a 100644 (file)
@@ -170,6 +170,7 @@ struct sk_buff *__xdp_build_skb_from_frame(struct xdp_frame *xdpf,
 struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
                                         struct net_device *dev);
 int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp);
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf);
 
 static inline
 void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
index fcad364..c40fc97 100644 (file)
@@ -110,7 +110,11 @@ DECLARE_EVENT_CLASS(xdp_redirect_template,
                u32 ifindex = 0, map_index = index;
 
                if (map_type == BPF_MAP_TYPE_DEVMAP || map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
-                       ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
+                       /* Just leave to_ifindex to 0 if do broadcast redirect,
+                        * as tgt will be NULL.
+                        */
+                       if (tgt)
+                               ifindex = ((struct _bpf_dtab_netdev *)tgt)->dev->ifindex;
                } else if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
                        ifindex = index;
                        map_index = 0;
index 418b9b8..bf9252c 100644 (file)
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
  *             Look up an element with the given *key* in the map referred to
  *             by the file descriptor *fd*, and if found, delete the element.
  *
+ *             For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *             types, the *flags* argument needs to be set to 0, but for other
+ *             map types, it may be specified as:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up and delete the value of a spin-locked map
+ *                     without returning the lock. This must be specified if
+ *                     the elements contain a spinlock.
+ *
  *             The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
  *             implement this command as a "pop" operation, deleting the top
  *             element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
  *             This command is only valid for the following map types:
  *             * **BPF_MAP_TYPE_QUEUE**
  *             * **BPF_MAP_TYPE_STACK**
+ *             * **BPF_MAP_TYPE_HASH**
+ *             * **BPF_MAP_TYPE_PERCPU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
  *
  *     Return
  *             Returns zero on success. On error, -1 is returned and *errno*
@@ -981,6 +994,8 @@ enum bpf_attach_type {
        BPF_SK_LOOKUP,
        BPF_XDP,
        BPF_SK_SKB_VERDICT,
+       BPF_SK_REUSEPORT_SELECT,
+       BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -2542,8 +2557,12 @@ union bpf_attr {
  *             The lower two bits of *flags* are used as the return code if
  *             the map lookup fails. This is so that the return value can be
  *             one of the XDP program return codes up to **XDP_TX**, as chosen
- *             by the caller. Any higher bits in the *flags* argument must be
- *             unset.
+ *             by the caller. The higher bits of *flags* can be set to
+ *             BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ *             With BPF_F_BROADCAST the packet will be broadcasted to all the
+ *             interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ *             interface will be excluded when do broadcasting.
  *
  *             See also **bpf_redirect**\ (), which only supports redirecting
  *             to an ifindex, but doesn't require a map to do so.
@@ -5109,6 +5128,12 @@ enum {
        BPF_F_BPRM_SECUREEXEC   = (1ULL << 0),
 };
 
+/* Flags for bpf_redirect_map helper */
+enum {
+       BPF_F_BROADCAST         = (1ULL << 3),
+       BPF_F_EXCLUDE_INGRESS   = (1ULL << 4),
+};
+
 #define __bpf_md_ptr(type, name)       \
 union {                                        \
        type name;                      \
@@ -5393,6 +5418,20 @@ struct sk_reuseport_md {
        __u32 ip_protocol;      /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
        __u32 bind_inany;       /* Is sock bound to an INANY address? */
        __u32 hash;             /* A hash of the packet 4 tuples */
+       /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+        * new incoming connection request (e.g. selecting a listen sk for
+        * the received SYN in the TCP case).  reuse->sk is one of the sk
+        * in the reuseport group. The bpf prog can use reuse->sk to learn
+        * the local listening ip/port without looking into the skb.
+        *
+        * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+        * reuse->migrating_sk is the socket that needs to be migrated
+        * to another listening socket.  migrating_sk could be a fullsock
+        * sk that is fully established or a reqsk that is in-the-middle
+        * of 3-way handshake.
+        */
+       __bpf_md_ptr(struct bpf_sock *, sk);
+       __bpf_md_ptr(struct bpf_sock *, migrating_sk);
 };
 
 #define BPF_TAG_SIZE   8
index 2921ca3..96ceed0 100644 (file)
@@ -72,7 +72,7 @@ void bpf_inode_storage_free(struct inode *inode)
                return;
        }
 
-       /* Netiher the bpf_prog nor the bpf-map's syscall
+       /* Neither the bpf_prog nor the bpf-map's syscall
         * could be modifying the local_storage->list now.
         * Thus, no elem can be added-to or deleted-from the
         * local_storage->list by the bpf_prog or by the bpf-map's syscall.
index da471bf..0606237 100644 (file)
@@ -127,7 +127,7 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 }
 
 /* The set of hooks which are called without pagefaults disabled and are allowed
- * to "sleep" and thus can be used for sleeable BPF programs.
+ * to "sleep" and thus can be used for sleepable BPF programs.
  */
 BTF_SET_START(sleepable_lsm_hooks)
 BTF_ID(func, bpf_lsm_bpf)
index 3925592..cb4b729 100644 (file)
@@ -51,7 +51,7 @@
  * The BTF type section contains a list of 'struct btf_type' objects.
  * Each one describes a C type.  Recall from the above section
  * that a 'struct btf_type' object could be immediately followed by extra
- * data in order to desribe some particular C types.
+ * data in order to describe some particular C types.
  *
  * type_id:
  * ~~~~~~~
@@ -1143,7 +1143,7 @@ static void *btf_show_obj_safe(struct btf_show *show,
 
        /*
         * We need a new copy to our safe object, either because we haven't
-        * yet copied and are intializing safe data, or because the data
+        * yet copied and are initializing safe data, or because the data
         * we want falls outside the boundaries of the safe object.
         */
        if (!safe) {
@@ -3417,7 +3417,7 @@ static struct btf_kind_operations func_proto_ops = {
         * BTF_KIND_FUNC_PROTO cannot be directly referred by
         * a struct's member.
         *
-        * It should be a funciton pointer instead.
+        * It should be a function pointer instead.
         * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO)
         *
         * Hence, there is no btf_func_check_member().
index 5e31ee9..034ad93 100644 (file)
@@ -1392,29 +1392,54 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn)
 select_insn:
        goto *jumptable[insn->code];
 
-       /* ALU */
-#define ALU(OPCODE, OP)                        \
-       ALU64_##OPCODE##_X:             \
-               DST = DST OP SRC;       \
-               CONT;                   \
-       ALU_##OPCODE##_X:               \
-               DST = (u32) DST OP (u32) SRC;   \
-               CONT;                   \
-       ALU64_##OPCODE##_K:             \
-               DST = DST OP IMM;               \
-               CONT;                   \
-       ALU_##OPCODE##_K:               \
-               DST = (u32) DST OP (u32) IMM;   \
+       /* Explicitly mask the register-based shift amounts with 63 or 31
+        * to avoid undefined behavior. Normally this won't affect the
+        * generated code, for example, in case of native 64 bit archs such
+        * as x86-64 or arm64, the compiler is optimizing the AND away for
+        * the interpreter. In case of JITs, each of the JIT backends compiles
+        * the BPF shift operations to machine instructions which produce
+        * implementation-defined results in such a case; the resulting
+        * contents of the register may be arbitrary, but program behaviour
+        * as a whole remains defined. In other words, in case of JIT backends,
+        * the AND must /not/ be added to the emitted LSH/RSH/ARSH translation.
+        */
+       /* ALU (shifts) */
+#define SHT(OPCODE, OP)                                        \
+       ALU64_##OPCODE##_X:                             \
+               DST = DST OP (SRC & 63);                \
+               CONT;                                   \
+       ALU_##OPCODE##_X:                               \
+               DST = (u32) DST OP ((u32) SRC & 31);    \
+               CONT;                                   \
+       ALU64_##OPCODE##_K:                             \
+               DST = DST OP IMM;                       \
+               CONT;                                   \
+       ALU_##OPCODE##_K:                               \
+               DST = (u32) DST OP (u32) IMM;           \
+               CONT;
+       /* ALU (rest) */
+#define ALU(OPCODE, OP)                                        \
+       ALU64_##OPCODE##_X:                             \
+               DST = DST OP SRC;                       \
+               CONT;                                   \
+       ALU_##OPCODE##_X:                               \
+               DST = (u32) DST OP (u32) SRC;           \
+               CONT;                                   \
+       ALU64_##OPCODE##_K:                             \
+               DST = DST OP IMM;                       \
+               CONT;                                   \
+       ALU_##OPCODE##_K:                               \
+               DST = (u32) DST OP (u32) IMM;           \
                CONT;
-
        ALU(ADD,  +)
        ALU(SUB,  -)
        ALU(AND,  &)
        ALU(OR,   |)
-       ALU(LSH, <<)
-       ALU(RSH, >>)
        ALU(XOR,  ^)
        ALU(MUL,  *)
+       SHT(LSH, <<)
+       SHT(RSH, >>)
+#undef SHT
 #undef ALU
        ALU_NEG:
                DST = (u32) -DST;
@@ -1439,13 +1464,13 @@ select_insn:
                insn++;
                CONT;
        ALU_ARSH_X:
-               DST = (u64) (u32) (((s32) DST) >> SRC);
+               DST = (u64) (u32) (((s32) DST) >> (SRC & 31));
                CONT;
        ALU_ARSH_K:
                DST = (u64) (u32) (((s32) DST) >> IMM);
                CONT;
        ALU64_ARSH_X:
-               (*(s64 *) &DST) >>= SRC;
+               (*(s64 *) &DST) >>= (SRC & 63);
                CONT;
        ALU64_ARSH_K:
                (*(s64 *) &DST) >>= IMM;
index 5dd3e86..a1a0c4e 100644 (file)
@@ -601,7 +601,8 @@ static int cpu_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
 
 static int cpu_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __cpu_map_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+                                     __cpu_map_lookup_elem);
 }
 
 static int cpu_map_btf_id;
index aa51647..2a75e6c 100644 (file)
@@ -57,6 +57,7 @@ struct xdp_dev_bulk_queue {
        struct list_head flush_node;
        struct net_device *dev;
        struct net_device *dev_rx;
+       struct bpf_prog *xdp_prog;
        unsigned int count;
 };
 
@@ -197,6 +198,7 @@ static void dev_map_free(struct bpf_map *map)
        list_del_rcu(&dtab->list);
        spin_unlock(&dev_map_lock);
 
+       bpf_clear_redirect_map(map);
        synchronize_rcu();
 
        /* Make sure prior __dev_map_entry_free() have completed. */
@@ -326,22 +328,69 @@ bool dev_map_can_have_prog(struct bpf_map *map)
        return false;
 }
 
+static int dev_map_bpf_prog_run(struct bpf_prog *xdp_prog,
+                               struct xdp_frame **frames, int n,
+                               struct net_device *dev)
+{
+       struct xdp_txq_info txq = { .dev = dev };
+       struct xdp_buff xdp;
+       int i, nframes = 0;
+
+       for (i = 0; i < n; i++) {
+               struct xdp_frame *xdpf = frames[i];
+               u32 act;
+               int err;
+
+               xdp_convert_frame_to_buff(xdpf, &xdp);
+               xdp.txq = &txq;
+
+               act = bpf_prog_run_xdp(xdp_prog, &xdp);
+               switch (act) {
+               case XDP_PASS:
+                       err = xdp_update_frame_from_buff(&xdp, xdpf);
+                       if (unlikely(err < 0))
+                               xdp_return_frame_rx_napi(xdpf);
+                       else
+                               frames[nframes++] = xdpf;
+                       break;
+               default:
+                       bpf_warn_invalid_xdp_action(act);
+                       fallthrough;
+               case XDP_ABORTED:
+                       trace_xdp_exception(dev, xdp_prog, act);
+                       fallthrough;
+               case XDP_DROP:
+                       xdp_return_frame_rx_napi(xdpf);
+                       break;
+               }
+       }
+       return nframes; /* sent frames count */
+}
+
 static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
        struct net_device *dev = bq->dev;
+       unsigned int cnt = bq->count;
        int sent = 0, err = 0;
+       int to_send = cnt;
        int i;
 
-       if (unlikely(!bq->count))
+       if (unlikely(!cnt))
                return;
 
-       for (i = 0; i < bq->count; i++) {
+       for (i = 0; i < cnt; i++) {
                struct xdp_frame *xdpf = bq->q[i];
 
                prefetch(xdpf);
        }
 
-       sent = dev->netdev_ops->ndo_xdp_xmit(dev, bq->count, bq->q, flags);
+       if (bq->xdp_prog) {
+               to_send = dev_map_bpf_prog_run(bq->xdp_prog, bq->q, cnt, dev);
+               if (!to_send)
+                       goto out;
+       }
+
+       sent = dev->netdev_ops->ndo_xdp_xmit(dev, to_send, bq->q, flags);
        if (sent < 0) {
                /* If ndo_xdp_xmit fails with an errno, no frames have
                 * been xmit'ed.
@@ -353,13 +402,12 @@ static void bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
        /* If not all frames have been transmitted, it is our
         * responsibility to free them
         */
-       for (i = sent; unlikely(i < bq->count); i++)
+       for (i = sent; unlikely(i < to_send); i++)
                xdp_return_frame_rx_napi(bq->q[i]);
 
-       trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, bq->count - sent, err);
-       bq->dev_rx = NULL;
+out:
        bq->count = 0;
-       __list_del_clearprev(&bq->flush_node);
+       trace_xdp_devmap_xmit(bq->dev_rx, dev, sent, cnt - sent, err);
 }
 
 /* __dev_flush is called from xdp_do_flush() which _must_ be signaled
@@ -377,13 +425,17 @@ void __dev_flush(void)
        struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
        struct xdp_dev_bulk_queue *bq, *tmp;
 
-       list_for_each_entry_safe(bq, tmp, flush_list, flush_node)
+       list_for_each_entry_safe(bq, tmp, flush_list, flush_node) {
                bq_xmit_all(bq, XDP_XMIT_FLUSH);
+               bq->dev_rx = NULL;
+               bq->xdp_prog = NULL;
+               __list_del_clearprev(&bq->flush_node);
+       }
 }
 
 /* rcu_read_lock (from syscall and BPF contexts) ensures that if a delete and/or
- * update happens in parallel here a dev_put wont happen until after reading the
- * ifindex.
+ * update happens in parallel here a dev_put won't happen until after reading
+ * the ifindex.
  */
 static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
 {
@@ -401,7 +453,7 @@ static void *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
  * Thus, safe percpu variable access.
  */
 static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
-                      struct net_device *dev_rx)
+                      struct net_device *dev_rx, struct bpf_prog *xdp_prog)
 {
        struct list_head *flush_list = this_cpu_ptr(&dev_flush_list);
        struct xdp_dev_bulk_queue *bq = this_cpu_ptr(dev->xdp_bulkq);
@@ -412,18 +464,22 @@ static void bq_enqueue(struct net_device *dev, struct xdp_frame *xdpf,
        /* Ingress dev_rx will be the same for all xdp_frame's in
         * bulk_queue, because bq stored per-CPU and must be flushed
         * from net_device drivers NAPI func end.
+        *
+        * Do the same with xdp_prog and flush_list since these fields
+        * are only ever modified together.
         */
-       if (!bq->dev_rx)
+       if (!bq->dev_rx) {
                bq->dev_rx = dev_rx;
+               bq->xdp_prog = xdp_prog;
+               list_add(&bq->flush_node, flush_list);
+       }
 
        bq->q[bq->count++] = xdpf;
-
-       if (!bq->flush_node.prev)
-               list_add(&bq->flush_node, flush_list);
 }
 
 static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
-                              struct net_device *dev_rx)
+                               struct net_device *dev_rx,
+                               struct bpf_prog *xdp_prog)
 {
        struct xdp_frame *xdpf;
        int err;
@@ -439,55 +495,115 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
        if (unlikely(!xdpf))
                return -EOVERFLOW;
 
-       bq_enqueue(dev, xdpf, dev_rx);
+       bq_enqueue(dev, xdpf, dev_rx, xdp_prog);
        return 0;
 }
 
-static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
-                                        struct xdp_buff *xdp,
-                                        struct bpf_prog *xdp_prog)
+int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
 {
-       struct xdp_txq_info txq = { .dev = dev };
-       u32 act;
+       return __xdp_enqueue(dev, xdp, dev_rx, NULL);
+}
 
-       xdp_set_data_meta_invalid(xdp);
-       xdp->txq = &txq;
+int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
+                   struct net_device *dev_rx)
+{
+       struct net_device *dev = dst->dev;
 
-       act = bpf_prog_run_xdp(xdp_prog, xdp);
-       switch (act) {
-       case XDP_PASS:
-               return xdp;
-       case XDP_DROP:
-               break;
-       default:
-               bpf_warn_invalid_xdp_action(act);
-               fallthrough;
-       case XDP_ABORTED:
-               trace_xdp_exception(dev, xdp_prog, act);
-               break;
-       }
+       return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog);
+}
 
-       xdp_return_buff(xdp);
-       return NULL;
+static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp,
+                        int exclude_ifindex)
+{
+       if (!obj || obj->dev->ifindex == exclude_ifindex ||
+           !obj->dev->netdev_ops->ndo_xdp_xmit)
+               return false;
+
+       if (xdp_ok_fwd_dev(obj->dev, xdp->data_end - xdp->data))
+               return false;
+
+       return true;
 }
 
-int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
-                   struct net_device *dev_rx)
+static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj,
+                                struct net_device *dev_rx,
+                                struct xdp_frame *xdpf)
 {
-       return __xdp_enqueue(dev, xdp, dev_rx);
+       struct xdp_frame *nxdpf;
+
+       nxdpf = xdpf_clone(xdpf);
+       if (!nxdpf)
+               return -ENOMEM;
+
+       bq_enqueue(obj->dev, nxdpf, dev_rx, obj->xdp_prog);
+
+       return 0;
 }
 
-int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
-                   struct net_device *dev_rx)
+int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx,
+                         struct bpf_map *map, bool exclude_ingress)
 {
-       struct net_device *dev = dst->dev;
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0;
+       struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       struct hlist_head *head;
+       struct xdp_frame *xdpf;
+       unsigned int i;
+       int err;
 
-       if (dst->xdp_prog) {
-               xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
-               if (!xdp)
-                       return 0;
+       xdpf = xdp_convert_buff_to_frame(xdp);
+       if (unlikely(!xdpf))
+               return -EOVERFLOW;
+
+       if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+               for (i = 0; i < map->max_entries; i++) {
+                       dst = READ_ONCE(dtab->netdev_map[i]);
+                       if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                               continue;
+
+                       /* we only need n-1 clones; last_dst enqueued below */
+                       if (!last_dst) {
+                               last_dst = dst;
+                               continue;
+                       }
+
+                       err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
+                       if (err)
+                               return err;
+
+                       last_dst = dst;
+               }
+       } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
+               for (i = 0; i < dtab->n_buckets; i++) {
+                       head = dev_map_index_hash(dtab, i);
+                       hlist_for_each_entry_rcu(dst, head, index_hlist,
+                                                lockdep_is_held(&dtab->index_lock)) {
+                               if (!is_valid_dst(dst, xdp, exclude_ifindex))
+                                       continue;
+
+                               /* we only need n-1 clones; last_dst enqueued below */
+                               if (!last_dst) {
+                                       last_dst = dst;
+                                       continue;
+                               }
+
+                               err = dev_map_enqueue_clone(last_dst, dev_rx, xdpf);
+                               if (err)
+                                       return err;
+
+                               last_dst = dst;
+                       }
+               }
        }
-       return __xdp_enqueue(dev, xdp, dev_rx);
+
+       /* consume the last copy of the frame */
+       if (last_dst)
+               bq_enqueue(last_dst->dev, xdpf, dev_rx, last_dst->xdp_prog);
+       else
+               xdp_return_frame_rx_napi(xdpf); /* dtab is empty */
+
+       return 0;
 }
 
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
@@ -504,6 +620,87 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
        return 0;
 }
 
+static int dev_map_redirect_clone(struct bpf_dtab_netdev *dst,
+                                 struct sk_buff *skb,
+                                 struct bpf_prog *xdp_prog)
+{
+       struct sk_buff *nskb;
+       int err;
+
+       nskb = skb_clone(skb, GFP_ATOMIC);
+       if (!nskb)
+               return -ENOMEM;
+
+       err = dev_map_generic_redirect(dst, nskb, xdp_prog);
+       if (unlikely(err)) {
+               consume_skb(nskb);
+               return err;
+       }
+
+       return 0;
+}
+
+int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb,
+                          struct bpf_prog *xdp_prog, struct bpf_map *map,
+                          bool exclude_ingress)
+{
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       int exclude_ifindex = exclude_ingress ? dev->ifindex : 0;
+       struct bpf_dtab_netdev *dst, *last_dst = NULL;
+       struct hlist_head *head;
+       struct hlist_node *next;
+       unsigned int i;
+       int err;
+
+       if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
+               for (i = 0; i < map->max_entries; i++) {
+                       dst = READ_ONCE(dtab->netdev_map[i]);
+                       if (!dst || dst->dev->ifindex == exclude_ifindex)
+                               continue;
+
+                       /* we only need n-1 clones; last_dst enqueued below */
+                       if (!last_dst) {
+                               last_dst = dst;
+                               continue;
+                       }
+
+                       err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
+                       if (err)
+                               return err;
+
+                       last_dst = dst;
+               }
+       } else { /* BPF_MAP_TYPE_DEVMAP_HASH */
+               for (i = 0; i < dtab->n_buckets; i++) {
+                       head = dev_map_index_hash(dtab, i);
+                       hlist_for_each_entry_safe(dst, next, head, index_hlist) {
+                               if (!dst || dst->dev->ifindex == exclude_ifindex)
+                                       continue;
+
+                               /* we only need n-1 clones; last_dst enqueued below */
+                               if (!last_dst) {
+                                       last_dst = dst;
+                                       continue;
+                               }
+
+                               err = dev_map_redirect_clone(last_dst, skb, xdp_prog);
+                               if (err)
+                                       return err;
+
+                               last_dst = dst;
+                       }
+               }
+       }
+
+       /* consume the first skb and return */
+       if (last_dst)
+               return dev_map_generic_redirect(last_dst, skb, xdp_prog);
+
+       /* dtab is empty */
+       consume_skb(skb);
+       return 0;
+}
+
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
        struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
@@ -730,12 +927,16 @@ static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value,
 
 static int dev_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags,
+                                     BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
+                                     __dev_map_lookup_elem);
 }
 
 static int dev_hash_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __dev_map_hash_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags,
+                                     BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS,
+                                     __dev_map_hash_lookup_elem);
 }
 
 static int dev_map_btf_id;
index d7ebb12..6f6681b 100644 (file)
  * events, kprobes and tracing to be invoked before the prior invocation
  * from one of these contexts completed. sys_bpf() uses the same mechanism
  * by pinning the task to the current CPU and incrementing the recursion
- * protection accross the map operation.
+ * protection across the map operation.
  *
  * This has subtle implications on PREEMPT_RT. PREEMPT_RT forbids certain
  * operations like memory allocations (even with GFP_ATOMIC) from atomic
  * contexts. This is required because even with GFP_ATOMIC the memory
- * allocator calls into code pathes which acquire locks with long held lock
+ * allocator calls into code paths which acquire locks with long held lock
  * sections. To ensure the deterministic behaviour these locks are regular
  * spinlocks, which are converted to 'sleepable' spinlocks on RT. The only
  * true atomic contexts on an RT kernel are the low level hardware
@@ -1401,6 +1401,100 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
        rcu_read_unlock();
 }
 
+static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+                                            void *value, bool is_lru_map,
+                                            bool is_percpu, u64 flags)
+{
+       struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+       struct hlist_nulls_head *head;
+       unsigned long bflags;
+       struct htab_elem *l;
+       u32 hash, key_size;
+       struct bucket *b;
+       int ret;
+
+       key_size = map->key_size;
+
+       hash = htab_map_hash(key, key_size, htab->hashrnd);
+       b = __select_bucket(htab, hash);
+       head = &b->head;
+
+       ret = htab_lock_bucket(htab, b, hash, &bflags);
+       if (ret)
+               return ret;
+
+       l = lookup_elem_raw(head, hash, key, key_size);
+       if (!l) {
+               ret = -ENOENT;
+       } else {
+               if (is_percpu) {
+                       u32 roundup_value_size = round_up(map->value_size, 8);
+                       void __percpu *pptr;
+                       int off = 0, cpu;
+
+                       pptr = htab_elem_get_ptr(l, key_size);
+                       for_each_possible_cpu(cpu) {
+                               bpf_long_memcpy(value + off,
+                                               per_cpu_ptr(pptr, cpu),
+                                               roundup_value_size);
+                               off += roundup_value_size;
+                       }
+               } else {
+                       u32 roundup_key_size = round_up(map->key_size, 8);
+
+                       if (flags & BPF_F_LOCK)
+                               copy_map_value_locked(map, value, l->key +
+                                                     roundup_key_size,
+                                                     true);
+                       else
+                               copy_map_value(map, value, l->key +
+                                              roundup_key_size);
+                       check_and_init_map_lock(map, value);
+               }
+
+               hlist_nulls_del_rcu(&l->hash_node);
+               if (!is_lru_map)
+                       free_htab_elem(htab, l);
+       }
+
+       htab_unlock_bucket(htab, b, hash, bflags);
+
+       if (is_lru_map && l)
+               bpf_lru_push_free(&htab->lru, &l->lru_node);
+
+       return ret;
+}
+
+static int htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+                                          void *value, u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, false, false,
+                                                flags);
+}
+
+static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+                                                 void *key, void *value,
+                                                 u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, false, true,
+                                                flags);
+}
+
+static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+                                              void *value, u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, true, false,
+                                                flags);
+}
+
+static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+                                                     void *key, void *value,
+                                                     u64 flags)
+{
+       return __htab_map_lookup_and_delete_elem(map, key, value, true, true,
+                                                flags);
+}
+
 static int
 __htab_map_lookup_and_delete_batch(struct bpf_map *map,
                                   const union bpf_attr *attr,
@@ -1934,6 +2028,7 @@ const struct bpf_map_ops htab_map_ops = {
        .map_free = htab_map_free,
        .map_get_next_key = htab_map_get_next_key,
        .map_lookup_elem = htab_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
        .map_update_elem = htab_map_update_elem,
        .map_delete_elem = htab_map_delete_elem,
        .map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2049,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
        .map_free = htab_map_free,
        .map_get_next_key = htab_map_get_next_key,
        .map_lookup_elem = htab_lru_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
        .map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
        .map_update_elem = htab_lru_map_update_elem,
        .map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2173,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
        .map_free = htab_map_free,
        .map_get_next_key = htab_map_get_next_key,
        .map_lookup_elem = htab_percpu_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
        .map_update_elem = htab_percpu_map_update_elem,
        .map_delete_elem = htab_map_delete_elem,
        .map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2193,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
        .map_free = htab_map_free,
        .map_get_next_key = htab_map_get_next_key,
        .map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+       .map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
        .map_update_elem = htab_lru_percpu_map_update_elem,
        .map_delete_elem = htab_lru_map_delete_elem,
        .map_seq_show_elem = htab_percpu_map_seq_show_elem,
index 52aa7b3..03af863 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
 #pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)
index 4838922..93a5539 100644 (file)
@@ -102,7 +102,7 @@ static void reuseport_array_free(struct bpf_map *map)
        /*
         * ops->map_*_elem() will not be able to access this
         * array now. Hence, this function only races with
-        * bpf_sk_reuseport_detach() which was triggerred by
+        * bpf_sk_reuseport_detach() which was triggered by
         * close() or disconnect().
         *
         * This function and bpf_sk_reuseport_detach() are
index 73d15bc..e343f15 100644 (file)
@@ -1484,7 +1484,7 @@ free_buf:
        return err;
 }
 
-#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD value
+#define BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD flags
 
 static int map_lookup_and_delete_elem(union bpf_attr *attr)
 {
@@ -1500,6 +1500,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
        if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM))
                return -EINVAL;
 
+       if (attr->flags & ~BPF_F_LOCK)
+               return -EINVAL;
+
        f = fdget(ufd);
        map = __bpf_map_get(f);
        if (IS_ERR(map))
@@ -1510,24 +1513,47 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
                goto err_put;
        }
 
+       if (attr->flags &&
+           (map->map_type == BPF_MAP_TYPE_QUEUE ||
+            map->map_type == BPF_MAP_TYPE_STACK)) {
+               err = -EINVAL;
+               goto err_put;
+       }
+
+       if ((attr->flags & BPF_F_LOCK) &&
+           !map_value_has_spin_lock(map)) {
+               err = -EINVAL;
+               goto err_put;
+       }
+
        key = __bpf_copy_key(ukey, map->key_size);
        if (IS_ERR(key)) {
                err = PTR_ERR(key);
                goto err_put;
        }
 
-       value_size = map->value_size;
+       value_size = bpf_map_value_size(map);
 
        err = -ENOMEM;
        value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
        if (!value)
                goto free_key;
 
+       err = -ENOTSUPP;
        if (map->map_type == BPF_MAP_TYPE_QUEUE ||
            map->map_type == BPF_MAP_TYPE_STACK) {
                err = map->ops->map_pop_elem(map, value);
-       } else {
-               err = -ENOTSUPP;
+       } else if (map->map_type == BPF_MAP_TYPE_HASH ||
+                  map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+                  map->map_type == BPF_MAP_TYPE_LRU_HASH ||
+                  map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+               if (!bpf_map_is_dev_bound(map)) {
+                       bpf_disable_instrumentation();
+                       rcu_read_lock();
+                       err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
+                       rcu_read_unlock();
+                       bpf_enable_instrumentation();
+               }
        }
 
        if (err)
@@ -1947,6 +1973,11 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr)
                        attr->expected_attach_type =
                                BPF_CGROUP_INET_SOCK_CREATE;
                break;
+       case BPF_PROG_TYPE_SK_REUSEPORT:
+               if (!attr->expected_attach_type)
+                       attr->expected_attach_type =
+                               BPF_SK_REUSEPORT_SELECT;
+               break;
        }
 }
 
@@ -2030,6 +2061,14 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
                if (expected_attach_type == BPF_SK_LOOKUP)
                        return 0;
                return -EINVAL;
+       case BPF_PROG_TYPE_SK_REUSEPORT:
+               switch (expected_attach_type) {
+               case BPF_SK_REUSEPORT_SELECT:
+               case BPF_SK_REUSEPORT_SELECT_OR_MIGRATE:
+                       return 0;
+               default:
+                       return -EINVAL;
+               }
        case BPF_PROG_TYPE_SYSCALL:
        case BPF_PROG_TYPE_EXT:
                if (expected_attach_type)
index ceac528..3d7127f 100644 (file)
@@ -111,28 +111,31 @@ struct tnum tnum_xor(struct tnum a, struct tnum b)
        return TNUM(v & ~mu, mu);
 }
 
-/* half-multiply add: acc += (unknown * mask * value).
- * An intermediate step in the multiply algorithm.
+/* Generate partial products by multiplying each bit in the multiplier (tnum a)
+ * with the multiplicand (tnum b), and add the partial products after
+ * appropriately bit-shifting them. Instead of directly performing tnum addition
+ * on the generated partial products, equivalenty, decompose each partial
+ * product into two tnums, consisting of the value-sum (acc_v) and the
+ * mask-sum (acc_m) and then perform tnum addition on them. The following paper
+ * explains the algorithm in more detail: https://arxiv.org/abs/2105.05398.
  */
-static struct tnum hma(struct tnum acc, u64 value, u64 mask)
-{
-       while (mask) {
-               if (mask & 1)
-                       acc = tnum_add(acc, TNUM(0, value));
-               mask >>= 1;
-               value <<= 1;
-       }
-       return acc;
-}
-
 struct tnum tnum_mul(struct tnum a, struct tnum b)
 {
-       struct tnum acc;
-       u64 pi;
-
-       pi = a.value * b.value;
-       acc = hma(TNUM(pi, 0), a.mask, b.mask | b.value);
-       return hma(acc, b.mask, a.value);
+       u64 acc_v = a.value * b.value;
+       struct tnum acc_m = TNUM(0, 0);
+
+       while (a.value || a.mask) {
+               /* LSB of tnum a is a certain 1 */
+               if (a.value & 1)
+                       acc_m = tnum_add(acc_m, TNUM(0, b.mask));
+               /* LSB of tnum a is uncertain */
+               else if (a.mask & 1)
+                       acc_m = tnum_add(acc_m, TNUM(0, b.value | b.mask));
+               /* Note: no case for LSB is certain 0 */
+               a = tnum_rshift(a, 1);
+               b = tnum_lshift(b, 1);
+       }
+       return tnum_add(TNUM(acc_v, 0), acc_m);
 }
 
 /* Note that if a and b disagree - i.e. one has a 'known 1' where the other has
index 2d44b5a..28a3630 100644 (file)
@@ -552,7 +552,7 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
  * __bpf_prog_enter returns:
  * 0 - skip execution of the bpf prog
  * 1 - execute bpf prog
- * [2..MAX_U64] - excute bpf prog and record execution time.
+ * [2..MAX_U64] - execute bpf prog and record execution time.
  *     This is start time.
  */
 u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
index 331b170..b7d51fc 100644 (file)
@@ -47,7 +47,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  * - unreachable insns exist (shouldn't be a forest. program = one function)
  * - out of bounds or malformed jumps
  * The second pass is all possible path descent from the 1st insn.
- * Since it's analyzing all pathes through the program, the length of the
+ * Since it's analyzing all paths through the program, the length of the
  * analysis is limited to 64k insn, which may be hit even if total number of
  * insn is less then 4K, but there are too many branches that change stack/regs.
  * Number of 'branches to be analyzed' is limited to 1k
@@ -132,7 +132,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  * If it's ok, then verifier allows this BPF_CALL insn and looks at
  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
- * returns ether pointer to map value or NULL.
+ * returns either pointer to map value or NULL.
  *
  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
  * insn, the register holding that pointer in the true branch changes state to
@@ -2616,7 +2616,7 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
                if (dst_reg != BPF_REG_FP) {
                        /* The backtracking logic can only recognize explicit
                         * stack slot address like [fp - 8]. Other spill of
-                        * scalar via different register has to be conervative.
+                        * scalar via different register has to be conservative.
                         * Backtrack from here and mark all registers as precise
                         * that contributed into 'reg' being a constant.
                         */
@@ -9059,7 +9059,7 @@ static int check_return_code(struct bpf_verifier_env *env)
            !prog->aux->attach_func_proto->type)
                return 0;
 
-       /* eBPF calling convetion is such that R0 is used
+       /* eBPF calling convention is such that R0 is used
         * to return the value from eBPF program.
         * Make sure that it's readable at this time
         * of bpf_exit, which means that program wrote
@@ -9850,7 +9850,7 @@ static void clean_verifier_state(struct bpf_verifier_env *env,
  * Since the verifier pushes the branch states as it sees them while exploring
  * the program the condition of walking the branch instruction for the second
  * time means that all states below this branch were already explored and
- * their final liveness markes are already propagated.
+ * their final liveness marks are already propagated.
  * Hence when the verifier completes the search of state list in is_state_visited()
  * we can call this clean_live_states() function to mark all liveness states
  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
@@ -12470,7 +12470,7 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
                        prog->aux->max_pkt_offset = MAX_PACKET_OFF;
 
                        /* mark bpf_tail_call as different opcode to avoid
-                        * conditional branch in the interpeter for every normal
+                        * conditional branch in the interpreter for every normal
                         * call and to prevent accidental JITing by JIT compiler
                         * that doesn't support bpf_tail_call yet
                         */
index 239de13..0b13d81 100644 (file)
@@ -3931,6 +3931,23 @@ void xdp_do_flush(void)
 }
 EXPORT_SYMBOL_GPL(xdp_do_flush);
 
+void bpf_clear_redirect_map(struct bpf_map *map)
+{
+       struct bpf_redirect_info *ri;
+       int cpu;
+
+       for_each_possible_cpu(cpu) {
+               ri = per_cpu_ptr(&bpf_redirect_info, cpu);
+               /* Avoid polluting remote cacheline due to writes if
+                * not needed. Once we pass this test, we need the
+                * cmpxchg() to make sure it hasn't been changed in
+                * the meantime by remote CPU.
+                */
+               if (unlikely(READ_ONCE(ri->map) == map))
+                       cmpxchg(&ri->map, map, NULL);
+       }
+}
+
 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
                    struct bpf_prog *xdp_prog)
 {
@@ -3938,6 +3955,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
        enum bpf_map_type map_type = ri->map_type;
        void *fwd = ri->tgt_value;
        u32 map_id = ri->map_id;
+       struct bpf_map *map;
        int err;
 
        ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
@@ -3947,7 +3965,14 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
        case BPF_MAP_TYPE_DEVMAP:
                fallthrough;
        case BPF_MAP_TYPE_DEVMAP_HASH:
-               err = dev_map_enqueue(fwd, xdp, dev);
+               map = READ_ONCE(ri->map);
+               if (unlikely(map)) {
+                       WRITE_ONCE(ri->map, NULL);
+                       err = dev_map_enqueue_multi(xdp, dev, map,
+                                                   ri->flags & BPF_F_EXCLUDE_INGRESS);
+               } else {
+                       err = dev_map_enqueue(fwd, xdp, dev);
+               }
                break;
        case BPF_MAP_TYPE_CPUMAP:
                err = cpu_map_enqueue(fwd, xdp, dev);
@@ -3989,13 +4014,21 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
                                       enum bpf_map_type map_type, u32 map_id)
 {
        struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+       struct bpf_map *map;
        int err;
 
        switch (map_type) {
        case BPF_MAP_TYPE_DEVMAP:
                fallthrough;
        case BPF_MAP_TYPE_DEVMAP_HASH:
-               err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+               map = READ_ONCE(ri->map);
+               if (unlikely(map)) {
+                       WRITE_ONCE(ri->map, NULL);
+                       err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
+                                                    ri->flags & BPF_F_EXCLUDE_INGRESS);
+               } else {
+                       err = dev_map_generic_redirect(fwd, skb, xdp_prog);
+               }
                if (unlikely(err))
                        goto err;
                break;
@@ -10012,11 +10045,13 @@ out:
 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
                                    struct sock_reuseport *reuse,
                                    struct sock *sk, struct sk_buff *skb,
+                                   struct sock *migrating_sk,
                                    u32 hash)
 {
        reuse_kern->skb = skb;
        reuse_kern->sk = sk;
        reuse_kern->selected_sk = NULL;
+       reuse_kern->migrating_sk = migrating_sk;
        reuse_kern->data_end = skb->data + skb_headlen(skb);
        reuse_kern->hash = hash;
        reuse_kern->reuseport_id = reuse->reuseport_id;
@@ -10025,12 +10060,13 @@ static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
 
 struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
                                  struct bpf_prog *prog, struct sk_buff *skb,
+                                 struct sock *migrating_sk,
                                  u32 hash)
 {
        struct sk_reuseport_kern reuse_kern;
        enum sk_action action;
 
-       bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
+       bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
        action = BPF_PROG_RUN(prog, &reuse_kern);
 
        if (action == SK_PASS)
@@ -10140,6 +10176,8 @@ sk_reuseport_func_proto(enum bpf_func_id func_id,
                return &sk_reuseport_load_bytes_proto;
        case BPF_FUNC_skb_load_bytes_relative:
                return &sk_reuseport_load_bytes_relative_proto;
+       case BPF_FUNC_get_socket_cookie:
+               return &bpf_get_socket_ptr_cookie_proto;
        default:
                return bpf_base_func_proto(func_id);
        }
@@ -10169,6 +10207,14 @@ sk_reuseport_is_valid_access(int off, int size,
        case offsetof(struct sk_reuseport_md, hash):
                return size == size_default;
 
+       case offsetof(struct sk_reuseport_md, sk):
+               info->reg_type = PTR_TO_SOCKET;
+               return size == sizeof(__u64);
+
+       case offsetof(struct sk_reuseport_md, migrating_sk):
+               info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
+               return size == sizeof(__u64);
+
        /* Fields that allow narrowing */
        case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
                if (size < sizeof_field(struct sk_buff, protocol))
@@ -10241,6 +10287,14 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
        case offsetof(struct sk_reuseport_md, bind_inany):
                SK_REUSEPORT_LOAD_FIELD(bind_inany);
                break;
+
+       case offsetof(struct sk_reuseport_md, sk):
+               SK_REUSEPORT_LOAD_FIELD(sk);
+               break;
+
+       case offsetof(struct sk_reuseport_md, migrating_sk):
+               SK_REUSEPORT_LOAD_FIELD(migrating_sk);
+               break;
        }
 
        return insn - insn_buf;
index b065f0a..de5ee3a 100644 (file)
 DEFINE_SPINLOCK(reuseport_lock);
 
 static DEFINE_IDA(reuseport_ida);
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+                              struct sock_reuseport *reuse, bool bind_inany);
+
+static int reuseport_sock_index(struct sock *sk,
+                               const struct sock_reuseport *reuse,
+                               bool closed)
+{
+       int left, right;
+
+       if (!closed) {
+               left = 0;
+               right = reuse->num_socks;
+       } else {
+               left = reuse->max_socks - reuse->num_closed_socks;
+               right = reuse->max_socks;
+       }
+
+       for (; left < right; left++)
+               if (reuse->socks[left] == sk)
+                       return left;
+       return -1;
+}
+
+static void __reuseport_add_sock(struct sock *sk,
+                                struct sock_reuseport *reuse)
+{
+       reuse->socks[reuse->num_socks] = sk;
+       /* paired with smp_rmb() in reuseport_(select|migrate)_sock() */
+       smp_wmb();
+       reuse->num_socks++;
+}
+
+static bool __reuseport_detach_sock(struct sock *sk,
+                                   struct sock_reuseport *reuse)
+{
+       int i = reuseport_sock_index(sk, reuse, false);
+
+       if (i == -1)
+               return false;
+
+       reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
+       reuse->num_socks--;
+
+       return true;
+}
+
+static void __reuseport_add_closed_sock(struct sock *sk,
+                                       struct sock_reuseport *reuse)
+{
+       reuse->socks[reuse->max_socks - reuse->num_closed_socks - 1] = sk;
+       /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+       WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks + 1);
+}
+
+static bool __reuseport_detach_closed_sock(struct sock *sk,
+                                          struct sock_reuseport *reuse)
+{
+       int i = reuseport_sock_index(sk, reuse, true);
+
+       if (i == -1)
+               return false;
+
+       reuse->socks[i] = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+       /* paired with READ_ONCE() in inet_csk_bind_conflict() */
+       WRITE_ONCE(reuse->num_closed_socks, reuse->num_closed_socks - 1);
+
+       return true;
+}
 
 static struct sock_reuseport *__reuseport_alloc(unsigned int max_socks)
 {
@@ -49,6 +117,12 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
        reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock));
        if (reuse) {
+               if (reuse->num_closed_socks) {
+                       /* sk was shutdown()ed before */
+                       ret = reuseport_resurrect(sk, reuse, NULL, bind_inany);
+                       goto out;
+               }
+
                /* Only set reuse->bind_inany if the bind_inany is true.
                 * Otherwise, it will overwrite the reuse->bind_inany
                 * which was set by the bind/hash path.
@@ -72,9 +146,9 @@ int reuseport_alloc(struct sock *sk, bool bind_inany)
        }
 
        reuse->reuseport_id = id;
+       reuse->bind_inany = bind_inany;
        reuse->socks[0] = sk;
        reuse->num_socks = 1;
-       reuse->bind_inany = bind_inany;
        rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
 out:
@@ -90,14 +164,30 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
        u32 more_socks_size, i;
 
        more_socks_size = reuse->max_socks * 2U;
-       if (more_socks_size > U16_MAX)
+       if (more_socks_size > U16_MAX) {
+               if (reuse->num_closed_socks) {
+                       /* Make room by removing a closed sk.
+                        * The child has already been migrated.
+                        * Only reqsk left at this point.
+                        */
+                       struct sock *sk;
+
+                       sk = reuse->socks[reuse->max_socks - reuse->num_closed_socks];
+                       RCU_INIT_POINTER(sk->sk_reuseport_cb, NULL);
+                       __reuseport_detach_closed_sock(sk, reuse);
+
+                       return reuse;
+               }
+
                return NULL;
+       }
 
        more_reuse = __reuseport_alloc(more_socks_size);
        if (!more_reuse)
                return NULL;
 
        more_reuse->num_socks = reuse->num_socks;
+       more_reuse->num_closed_socks = reuse->num_closed_socks;
        more_reuse->prog = reuse->prog;
        more_reuse->reuseport_id = reuse->reuseport_id;
        more_reuse->bind_inany = reuse->bind_inany;
@@ -105,9 +195,13 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse)
 
        memcpy(more_reuse->socks, reuse->socks,
               reuse->num_socks * sizeof(struct sock *));
+       memcpy(more_reuse->socks +
+              (more_reuse->max_socks - more_reuse->num_closed_socks),
+              reuse->socks + (reuse->max_socks - reuse->num_closed_socks),
+              reuse->num_closed_socks * sizeof(struct sock *));
        more_reuse->synq_overflow_ts = READ_ONCE(reuse->synq_overflow_ts);
 
-       for (i = 0; i < reuse->num_socks; ++i)
+       for (i = 0; i < reuse->max_socks; ++i)
                rcu_assign_pointer(reuse->socks[i]->sk_reuseport_cb,
                                   more_reuse);
 
@@ -152,13 +246,21 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
        reuse = rcu_dereference_protected(sk2->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock));
        old_reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
-                                            lockdep_is_held(&reuseport_lock));
+                                             lockdep_is_held(&reuseport_lock));
+       if (old_reuse && old_reuse->num_closed_socks) {
+               /* sk was shutdown()ed before */
+               int err = reuseport_resurrect(sk, old_reuse, reuse, reuse->bind_inany);
+
+               spin_unlock_bh(&reuseport_lock);
+               return err;
+       }
+
        if (old_reuse && old_reuse->num_socks != 1) {
                spin_unlock_bh(&reuseport_lock);
                return -EBUSY;
        }
 
-       if (reuse->num_socks == reuse->max_socks) {
+       if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
                reuse = reuseport_grow(reuse);
                if (!reuse) {
                        spin_unlock_bh(&reuseport_lock);
@@ -166,10 +268,7 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
                }
        }
 
-       reuse->socks[reuse->num_socks] = sk;
-       /* paired with smp_rmb() in reuseport_select_sock() */
-       smp_wmb();
-       reuse->num_socks++;
+       __reuseport_add_sock(sk, reuse);
        rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
 
        spin_unlock_bh(&reuseport_lock);
@@ -180,15 +279,77 @@ int reuseport_add_sock(struct sock *sk, struct sock *sk2, bool bind_inany)
 }
 EXPORT_SYMBOL(reuseport_add_sock);
 
+static int reuseport_resurrect(struct sock *sk, struct sock_reuseport *old_reuse,
+                              struct sock_reuseport *reuse, bool bind_inany)
+{
+       if (old_reuse == reuse) {
+               /* If sk was in the same reuseport group, just pop sk out of
+                * the closed section and push sk into the listening section.
+                */
+               __reuseport_detach_closed_sock(sk, old_reuse);
+               __reuseport_add_sock(sk, old_reuse);
+               return 0;
+       }
+
+       if (!reuse) {
+               /* In bind()/listen() path, we cannot carry over the eBPF prog
+                * for the shutdown()ed socket. In setsockopt() path, we should
+                * not change the eBPF prog of listening sockets by attaching a
+                * prog to the shutdown()ed socket. Thus, we will allocate a new
+                * reuseport group and detach sk from the old group.
+                */
+               int id;
+
+               reuse = __reuseport_alloc(INIT_SOCKS);
+               if (!reuse)
+                       return -ENOMEM;
+
+               id = ida_alloc(&reuseport_ida, GFP_ATOMIC);
+               if (id < 0) {
+                       kfree(reuse);
+                       return id;
+               }
+
+               reuse->reuseport_id = id;
+               reuse->bind_inany = bind_inany;
+       } else {
+               /* Move sk from the old group to the new one if
+                * - all the other listeners in the old group were close()d or
+                *   shutdown()ed, and then sk2 has listen()ed on the same port
+                * OR
+                * - sk listen()ed without bind() (or with autobind), was
+                *   shutdown()ed, and then listen()s on another port which
+                *   sk2 listen()s on.
+                */
+               if (reuse->num_socks + reuse->num_closed_socks == reuse->max_socks) {
+                       reuse = reuseport_grow(reuse);
+                       if (!reuse)
+                               return -ENOMEM;
+               }
+       }
+
+       __reuseport_detach_closed_sock(sk, old_reuse);
+       __reuseport_add_sock(sk, reuse);
+       rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+
+       if (old_reuse->num_socks + old_reuse->num_closed_socks == 0)
+               call_rcu(&old_reuse->rcu, reuseport_free_rcu);
+
+       return 0;
+}
+
 void reuseport_detach_sock(struct sock *sk)
 {
        struct sock_reuseport *reuse;
-       int i;
 
        spin_lock_bh(&reuseport_lock);
        reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock));
 
+       /* reuseport_grow() has detached a closed sk */
+       if (!reuse)
+               goto out;
+
        /* Notify the bpf side. The sk may be added to a sockarray
         * map. If so, sockarray logic will remove it from the map.
         *
@@ -201,19 +362,52 @@ void reuseport_detach_sock(struct sock *sk)
 
        rcu_assign_pointer(sk->sk_reuseport_cb, NULL);
 
-       for (i = 0; i < reuse->num_socks; i++) {
-               if (reuse->socks[i] == sk) {
-                       reuse->socks[i] = reuse->socks[reuse->num_socks - 1];
-                       reuse->num_socks--;
-                       if (reuse->num_socks == 0)
-                               call_rcu(&reuse->rcu, reuseport_free_rcu);
-                       break;
-               }
-       }
+       if (!__reuseport_detach_closed_sock(sk, reuse))
+               __reuseport_detach_sock(sk, reuse);
+
+       if (reuse->num_socks + reuse->num_closed_socks == 0)
+               call_rcu(&reuse->rcu, reuseport_free_rcu);
+
+out:
        spin_unlock_bh(&reuseport_lock);
 }
 EXPORT_SYMBOL(reuseport_detach_sock);
 
+void reuseport_stop_listen_sock(struct sock *sk)
+{
+       if (sk->sk_protocol == IPPROTO_TCP) {
+               struct sock_reuseport *reuse;
+               struct bpf_prog *prog;
+
+               spin_lock_bh(&reuseport_lock);
+
+               reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
+                                                 lockdep_is_held(&reuseport_lock));
+               prog = rcu_dereference_protected(reuse->prog,
+                                                lockdep_is_held(&reuseport_lock));
+
+               if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+                   (prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
+                       /* Migration capable, move sk from the listening section
+                        * to the closed section.
+                        */
+                       bpf_sk_reuseport_detach(sk);
+
+                       __reuseport_detach_sock(sk, reuse);
+                       __reuseport_add_closed_sock(sk, reuse);
+
+                       spin_unlock_bh(&reuseport_lock);
+                       return;
+               }
+
+               spin_unlock_bh(&reuseport_lock);
+       }
+
+       /* Not capable to do migration, detach immediately */
+       reuseport_detach_sock(sk);
+}
+EXPORT_SYMBOL(reuseport_stop_listen_sock);
+
 static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
                                   struct bpf_prog *prog, struct sk_buff *skb,
                                   int hdr_len)
@@ -244,6 +438,23 @@ static struct sock *run_bpf_filter(struct sock_reuseport *reuse, u16 socks,
        return reuse->socks[index];
 }
 
+static struct sock *reuseport_select_sock_by_hash(struct sock_reuseport *reuse,
+                                                 u32 hash, u16 num_socks)
+{
+       int i, j;
+
+       i = j = reciprocal_scale(hash, num_socks);
+       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
+               i++;
+               if (i >= num_socks)
+                       i = 0;
+               if (i == j)
+                       return NULL;
+       }
+
+       return reuse->socks[i];
+}
+
 /**
  *  reuseport_select_sock - Select a socket from an SO_REUSEPORT group.
  *  @sk: First socket in the group.
@@ -274,32 +485,21 @@ struct sock *reuseport_select_sock(struct sock *sk,
        prog = rcu_dereference(reuse->prog);
        socks = READ_ONCE(reuse->num_socks);
        if (likely(socks)) {
-               /* paired with smp_wmb() in reuseport_add_sock() */
+               /* paired with smp_wmb() in __reuseport_add_sock() */
                smp_rmb();
 
                if (!prog || !skb)
                        goto select_by_hash;
 
                if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
-                       sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, hash);
+                       sk2 = bpf_run_sk_reuseport(reuse, sk, prog, skb, NULL, hash);
                else
                        sk2 = run_bpf_filter(reuse, socks, prog, skb, hdr_len);
 
 select_by_hash:
                /* no bpf or invalid bpf result: fall back to hash usage */
-               if (!sk2) {
-                       int i, j;
-
-                       i = j = reciprocal_scale(hash, socks);
-                       while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) {
-                               i++;
-                               if (i >= socks)
-                                       i = 0;
-                               if (i == j)
-                                       goto out;
-                       }
-                       sk2 = reuse->socks[i];
-               }
+               if (!sk2)
+                       sk2 = reuseport_select_sock_by_hash(reuse, hash, socks);
        }
 
 out:
@@ -308,14 +508,84 @@ out:
 }
 EXPORT_SYMBOL(reuseport_select_sock);
 
+/**
+ *  reuseport_migrate_sock - Select a socket from an SO_REUSEPORT group.
+ *  @sk: close()ed or shutdown()ed socket in the group.
+ *  @migrating_sk: ESTABLISHED/SYN_RECV full socket in the accept queue or
+ *    NEW_SYN_RECV request socket during 3WHS.
+ *  @skb: skb to run through BPF filter.
+ *  Returns a socket (with sk_refcnt +1) that should accept the child socket
+ *  (or NULL on error).
+ */
+struct sock *reuseport_migrate_sock(struct sock *sk,
+                                   struct sock *migrating_sk,
+                                   struct sk_buff *skb)
+{
+       struct sock_reuseport *reuse;
+       struct sock *nsk = NULL;
+       bool allocated = false;
+       struct bpf_prog *prog;
+       u16 socks;
+       u32 hash;
+
+       rcu_read_lock();
+
+       reuse = rcu_dereference(sk->sk_reuseport_cb);
+       if (!reuse)
+               goto out;
+
+       socks = READ_ONCE(reuse->num_socks);
+       if (unlikely(!socks))
+               goto out;
+
+       /* paired with smp_wmb() in __reuseport_add_sock() */
+       smp_rmb();
+
+       hash = migrating_sk->sk_hash;
+       prog = rcu_dereference(reuse->prog);
+       if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
+               if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+                       goto select_by_hash;
+               goto out;
+       }
+
+       if (!skb) {
+               skb = alloc_skb(0, GFP_ATOMIC);
+               if (!skb)
+                       goto out;
+               allocated = true;
+       }
+
+       nsk = bpf_run_sk_reuseport(reuse, sk, prog, skb, migrating_sk, hash);
+
+       if (allocated)
+               kfree_skb(skb);
+
+select_by_hash:
+       if (!nsk)
+               nsk = reuseport_select_sock_by_hash(reuse, hash, socks);
+
+       if (IS_ERR_OR_NULL(nsk) || unlikely(!refcount_inc_not_zero(&nsk->sk_refcnt)))
+               nsk = NULL;
+
+out:
+       rcu_read_unlock();
+       return nsk;
+}
+EXPORT_SYMBOL(reuseport_migrate_sock);
+
 int reuseport_attach_prog(struct sock *sk, struct bpf_prog *prog)
 {
        struct sock_reuseport *reuse;
        struct bpf_prog *old_prog;
 
-       if (sk_unhashed(sk) && sk->sk_reuseport) {
-               int err = reuseport_alloc(sk, false);
+       if (sk_unhashed(sk)) {
+               int err;
 
+               if (!sk->sk_reuseport)
+                       return -EINVAL;
+
+               err = reuseport_alloc(sk, false);
                if (err)
                        return err;
        } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
@@ -341,13 +611,24 @@ int reuseport_detach_prog(struct sock *sk)
        struct sock_reuseport *reuse;
        struct bpf_prog *old_prog;
 
-       if (!rcu_access_pointer(sk->sk_reuseport_cb))
-               return sk->sk_reuseport ? -ENOENT : -EINVAL;
-
        old_prog = NULL;
        spin_lock_bh(&reuseport_lock);
        reuse = rcu_dereference_protected(sk->sk_reuseport_cb,
                                          lockdep_is_held(&reuseport_lock));
+
+       /* reuse must be checked after acquiring the reuseport_lock
+        * because reuseport_grow() can detach a closed sk.
+        */
+       if (!reuse) {
+               spin_unlock_bh(&reuseport_lock);
+               return sk->sk_reuseport ? -ENOENT : -EINVAL;
+       }
+
+       if (sk_unhashed(sk) && reuse->num_closed_socks) {
+               spin_unlock_bh(&reuseport_lock);
+               return -ENOENT;
+       }
+
        old_prog = rcu_replace_pointer(reuse->prog, old_prog,
                                       lockdep_is_held(&reuseport_lock));
        spin_unlock_bh(&reuseport_lock);
index 858276e..725d20f 100644 (file)
@@ -584,3 +584,31 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
        return __xdp_build_skb_from_frame(xdpf, skb, dev);
 }
 EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame);
+
+struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
+{
+       unsigned int headroom, totalsize;
+       struct xdp_frame *nxdpf;
+       struct page *page;
+       void *addr;
+
+       headroom = xdpf->headroom + sizeof(*xdpf);
+       totalsize = headroom + xdpf->len;
+
+       if (unlikely(totalsize > PAGE_SIZE))
+               return NULL;
+       page = dev_alloc_page();
+       if (!page)
+               return NULL;
+       addr = page_to_virt(page);
+
+       memcpy(addr, xdpf, totalsize);
+
+       nxdpf = addr;
+       nxdpf->data = addr + headroom;
+       nxdpf->frame_sz = PAGE_SIZE;
+       nxdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
+       nxdpf->mem.id = 0;
+
+       return nxdpf;
+}
index fd472ea..0eea878 100644 (file)
@@ -135,10 +135,18 @@ static int inet_csk_bind_conflict(const struct sock *sk,
                                  bool relax, bool reuseport_ok)
 {
        struct sock *sk2;
+       bool reuseport_cb_ok;
        bool reuse = sk->sk_reuse;
        bool reuseport = !!sk->sk_reuseport;
+       struct sock_reuseport *reuseport_cb;
        kuid_t uid = sock_i_uid((struct sock *)sk);
 
+       rcu_read_lock();
+       reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
+       /* paired with WRITE_ONCE() in __reuseport_(add|detach)_closed_sock */
+       reuseport_cb_ok = !reuseport_cb || READ_ONCE(reuseport_cb->num_closed_socks);
+       rcu_read_unlock();
+
        /*
         * Unlike other sk lookup places we do not check
         * for sk_net here, since _all_ the socks listed
@@ -156,14 +164,14 @@ static int inet_csk_bind_conflict(const struct sock *sk,
                                if ((!relax ||
                                     (!reuseport_ok &&
                                      reuseport && sk2->sk_reuseport &&
-                                     !rcu_access_pointer(sk->sk_reuseport_cb) &&
+                                     reuseport_cb_ok &&
                                      (sk2->sk_state == TCP_TIME_WAIT ||
                                       uid_eq(uid, sock_i_uid(sk2))))) &&
                                    inet_rcv_saddr_equal(sk, sk2, true))
                                        break;
                        } else if (!reuseport_ok ||
                                   !reuseport || !sk2->sk_reuseport ||
-                                  rcu_access_pointer(sk->sk_reuseport_cb) ||
+                                  !reuseport_cb_ok ||
                                   (sk2->sk_state != TCP_TIME_WAIT &&
                                    !uid_eq(uid, sock_i_uid(sk2)))) {
                                if (inet_rcv_saddr_equal(sk, sk2, true))
@@ -687,6 +695,64 @@ int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
 }
 EXPORT_SYMBOL(inet_rtx_syn_ack);
 
+static struct request_sock *inet_reqsk_clone(struct request_sock *req,
+                                            struct sock *sk)
+{
+       struct sock *req_sk, *nreq_sk;
+       struct request_sock *nreq;
+
+       nreq = kmem_cache_alloc(req->rsk_ops->slab, GFP_ATOMIC | __GFP_NOWARN);
+       if (!nreq) {
+               /* paired with refcount_inc_not_zero() in reuseport_migrate_sock() */
+               sock_put(sk);
+               return NULL;
+       }
+
+       req_sk = req_to_sk(req);
+       nreq_sk = req_to_sk(nreq);
+
+       memcpy(nreq_sk, req_sk,
+              offsetof(struct sock, sk_dontcopy_begin));
+       memcpy(&nreq_sk->sk_dontcopy_end, &req_sk->sk_dontcopy_end,
+              req->rsk_ops->obj_size - offsetof(struct sock, sk_dontcopy_end));
+
+       sk_node_init(&nreq_sk->sk_node);
+       nreq_sk->sk_tx_queue_mapping = req_sk->sk_tx_queue_mapping;
+#ifdef CONFIG_XPS
+       nreq_sk->sk_rx_queue_mapping = req_sk->sk_rx_queue_mapping;
+#endif
+       nreq_sk->sk_incoming_cpu = req_sk->sk_incoming_cpu;
+
+       nreq->rsk_listener = sk;
+
+       /* We need not acquire fastopenq->lock
+        * because the child socket is locked in inet_csk_listen_stop().
+        */
+       if (sk->sk_protocol == IPPROTO_TCP && tcp_rsk(nreq)->tfo_listener)
+               rcu_assign_pointer(tcp_sk(nreq->sk)->fastopen_rsk, nreq);
+
+       return nreq;
+}
+
+static void reqsk_queue_migrated(struct request_sock_queue *queue,
+                                const struct request_sock *req)
+{
+       if (req->num_timeout == 0)
+               atomic_inc(&queue->young);
+       atomic_inc(&queue->qlen);
+}
+
+static void reqsk_migrate_reset(struct request_sock *req)
+{
+       req->saved_syn = NULL;
+#if IS_ENABLED(CONFIG_IPV6)
+       inet_rsk(req)->ipv6_opt = NULL;
+       inet_rsk(req)->pktopts = NULL;
+#else
+       inet_rsk(req)->ireq_opt = NULL;
+#endif
+}
+
 /* return true if req was found in the ehash table */
 static bool reqsk_queue_unlink(struct request_sock *req)
 {
@@ -727,15 +793,39 @@ EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put);
 static void reqsk_timer_handler(struct timer_list *t)
 {
        struct request_sock *req = from_timer(req, t, rsk_timer);
+       struct request_sock *nreq = NULL, *oreq = req;
        struct sock *sk_listener = req->rsk_listener;
-       struct net *net = sock_net(sk_listener);
-       struct inet_connection_sock *icsk = inet_csk(sk_listener);
-       struct request_sock_queue *queue = &icsk->icsk_accept_queue;
+       struct inet_connection_sock *icsk;
+       struct request_sock_queue *queue;
+       struct net *net;
        int max_syn_ack_retries, qlen, expire = 0, resend = 0;
 
-       if (inet_sk_state_load(sk_listener) != TCP_LISTEN)
-               goto drop;
+       if (inet_sk_state_load(sk_listener) != TCP_LISTEN) {
+               struct sock *nsk;
+
+               nsk = reuseport_migrate_sock(sk_listener, req_to_sk(req), NULL);
+               if (!nsk)
+                       goto drop;
+
+               nreq = inet_reqsk_clone(req, nsk);
+               if (!nreq)
+                       goto drop;
 
+               /* The new timer for the cloned req can decrease the 2
+                * by calling inet_csk_reqsk_queue_drop_and_put(), so
+                * hold another count to prevent use-after-free and
+                * call reqsk_put() just before return.
+                */
+               refcount_set(&nreq->rsk_refcnt, 2 + 1);
+               timer_setup(&nreq->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
+               reqsk_queue_migrated(&inet_csk(nsk)->icsk_accept_queue, req);
+
+               req = nreq;
+               sk_listener = nsk;
+       }
+
+       icsk = inet_csk(sk_listener);
+       net = sock_net(sk_listener);
        max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
        /* Normally all the openreqs are young and become mature
         * (i.e. converted to established socket) for first timeout.
@@ -754,6 +844,7 @@ static void reqsk_timer_handler(struct timer_list *t)
         * embrions; and abort old ones without pity, if old
         * ones are about to clog our table.
         */
+       queue = &icsk->icsk_accept_queue;
        qlen = reqsk_queue_len(queue);
        if ((qlen << 1) > max(8U, READ_ONCE(sk_listener->sk_max_ack_backlog))) {
                int young = reqsk_queue_len_young(queue) << 1;
@@ -778,10 +869,36 @@ static void reqsk_timer_handler(struct timer_list *t)
                        atomic_dec(&queue->young);
                timeo = min(TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX);
                mod_timer(&req->rsk_timer, jiffies + timeo);
+
+               if (!nreq)
+                       return;
+
+               if (!inet_ehash_insert(req_to_sk(nreq), req_to_sk(oreq), NULL)) {
+                       /* delete timer */
+                       inet_csk_reqsk_queue_drop(sk_listener, nreq);
+                       goto drop;
+               }
+
+               reqsk_migrate_reset(oreq);
+               reqsk_queue_removed(&inet_csk(oreq->rsk_listener)->icsk_accept_queue, oreq);
+               reqsk_put(oreq);
+
+               reqsk_put(nreq);
                return;
        }
+
 drop:
-       inet_csk_reqsk_queue_drop_and_put(sk_listener, req);
+       /* Even if we can clone the req, we may need not retransmit any more
+        * SYN+ACKs (nreq->num_timeout > max_syn_ack_retries, etc), or another
+        * CPU may win the "own_req" race so that inet_ehash_insert() fails.
+        */
+       if (nreq) {
+               reqsk_migrate_reset(nreq);
+               reqsk_queue_removed(queue, nreq);
+               __reqsk_free(nreq);
+       }
+
+       inet_csk_reqsk_queue_drop_and_put(oreq->rsk_listener, oreq);
 }
 
 static void reqsk_queue_hash_req(struct request_sock *req,
@@ -997,12 +1114,40 @@ struct sock *inet_csk_complete_hashdance(struct sock *sk, struct sock *child,
                                         struct request_sock *req, bool own_req)
 {
        if (own_req) {
-               inet_csk_reqsk_queue_drop(sk, req);
-               reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-               if (inet_csk_reqsk_queue_add(sk, req, child))
+               inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+               reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+
+               if (sk != req->rsk_listener) {
+                       /* another listening sk has been selected,
+                        * migrate the req to it.
+                        */
+                       struct request_sock *nreq;
+
+                       /* hold a refcnt for the nreq->rsk_listener
+                        * which is assigned in inet_reqsk_clone()
+                        */
+                       sock_hold(sk);
+                       nreq = inet_reqsk_clone(req, sk);
+                       if (!nreq) {
+                               inet_child_forget(sk, req, child);
+                               goto child_put;
+                       }
+
+                       refcount_set(&nreq->rsk_refcnt, 1);
+                       if (inet_csk_reqsk_queue_add(sk, nreq, child)) {
+                               reqsk_migrate_reset(req);
+                               reqsk_put(req);
+                               return child;
+                       }
+
+                       reqsk_migrate_reset(nreq);
+                       __reqsk_free(nreq);
+               } else if (inet_csk_reqsk_queue_add(sk, req, child)) {
                        return child;
+               }
        }
        /* Too bad, another child took ownership of the request, undo. */
+child_put:
        bh_unlock_sock(child);
        sock_put(child);
        return NULL;
@@ -1028,14 +1173,36 @@ void inet_csk_listen_stop(struct sock *sk)
         * of the variants now.                 --ANK
         */
        while ((req = reqsk_queue_remove(queue, sk)) != NULL) {
-               struct sock *child = req->sk;
+               struct sock *child = req->sk, *nsk;
+               struct request_sock *nreq;
 
                local_bh_disable();
                bh_lock_sock(child);
                WARN_ON(sock_owned_by_user(child));
                sock_hold(child);
 
+               nsk = reuseport_migrate_sock(sk, child, NULL);
+               if (nsk) {
+                       nreq = inet_reqsk_clone(req, nsk);
+                       if (nreq) {
+                               refcount_set(&nreq->rsk_refcnt, 1);
+
+                               if (inet_csk_reqsk_queue_add(nsk, nreq, child)) {
+                                       reqsk_migrate_reset(req);
+                               } else {
+                                       reqsk_migrate_reset(nreq);
+                                       __reqsk_free(nreq);
+                               }
+
+                               /* inet_csk_reqsk_queue_add() has already
+                                * called inet_child_forget() on failure case.
+                                */
+                               goto skip_child_forget;
+                       }
+               }
+
                inet_child_forget(sk, req, child);
+skip_child_forget:
                reqsk_put(req);
                bh_unlock_sock(child);
                local_bh_enable();
index c96866a..80aeaf9 100644 (file)
@@ -697,7 +697,7 @@ void inet_unhash(struct sock *sk)
                goto unlock;
 
        if (rcu_access_pointer(sk->sk_reuseport_cb))
-               reuseport_detach_sock(sk);
+               reuseport_stop_listen_sock(sk);
        if (ilb) {
                inet_unhash2(hashinfo, sk);
                ilb->count--;
index 4fa77f1..6f1e64d 100644 (file)
@@ -961,6 +961,15 @@ static struct ctl_table ipv4_net_table[] = {
        },
 #endif
        {
+               .procname       = "tcp_migrate_req",
+               .data           = &init_net.ipv4.sysctl_tcp_migrate_req,
+               .maxlen         = sizeof(u8),
+               .mode           = 0644,
+               .proc_handler   = proc_dou8vec_minmax,
+               .extra1         = SYSCTL_ZERO,
+               .extra2         = SYSCTL_ONE
+       },
+       {
                .procname       = "tcp_reordering",
                .data           = &init_net.ipv4.sysctl_tcp_reordering,
                .maxlen         = sizeof(int),
index 4f5b68a..6cb8e26 100644 (file)
@@ -2002,13 +2002,21 @@ process:
                        goto csum_error;
                }
                if (unlikely(sk->sk_state != TCP_LISTEN)) {
-                       inet_csk_reqsk_queue_drop_and_put(sk, req);
-                       goto lookup;
+                       nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+                       if (!nsk) {
+                               inet_csk_reqsk_queue_drop_and_put(sk, req);
+                               goto lookup;
+                       }
+                       sk = nsk;
+                       /* reuseport_migrate_sock() has already held one sk_refcnt
+                        * before returning.
+                        */
+               } else {
+                       /* We own a reference on the listener, increase it again
+                        * as we might lose it too soon.
+                        */
+                       sock_hold(sk);
                }
-               /* We own a reference on the listener, increase it again
-                * as we might lose it too soon.
-                */
-               sock_hold(sk);
                refcounted = true;
                nsk = NULL;
                if (!tcp_filter(sk, skb)) {
index 7513ba4..f258a4c 100644 (file)
@@ -775,8 +775,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
                goto listen_overflow;
 
        if (own_req && rsk_drop_req(req)) {
-               reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
-               inet_csk_reqsk_queue_drop_and_put(sk, req);
+               reqsk_queue_removed(&inet_csk(req->rsk_listener)->icsk_accept_queue, req);
+               inet_csk_reqsk_queue_drop_and_put(req->rsk_listener, req);
                return child;
        }
 
index 4435fa3..4d71464 100644 (file)
@@ -1664,10 +1664,18 @@ process:
                        goto csum_error;
                }
                if (unlikely(sk->sk_state != TCP_LISTEN)) {
-                       inet_csk_reqsk_queue_drop_and_put(sk, req);
-                       goto lookup;
+                       nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+                       if (!nsk) {
+                               inet_csk_reqsk_queue_drop_and_put(sk, req);
+                               goto lookup;
+                       }
+                       sk = nsk;
+                       /* reuseport_migrate_sock() has already held one sk_refcnt
+                        * before returning.
+                        */
+               } else {
+                       sock_hold(sk);
                }
-               sock_hold(sk);
                refcounted = true;
                nsk = NULL;
                if (!tcp_filter(sk, skb)) {
index 56a28a6..f01ef6b 100644 (file)
@@ -27,7 +27,7 @@ static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
        unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
 
-       kfree(umem->pgs);
+       kvfree(umem->pgs);
        umem->pgs = NULL;
 }
 
@@ -99,8 +99,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
        long npgs;
        int err;
 
-       umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
-                           GFP_KERNEL | __GFP_NOWARN);
+       umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
        if (!umem->pgs)
                return -ENOMEM;
 
@@ -123,7 +122,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
 out_pin:
        xdp_umem_unpin_pages(umem);
 out_pgs:
-       kfree(umem->pgs);
+       kvfree(umem->pgs);
        umem->pgs = NULL;
        return err;
 }
index 67b4ce5..9df75ea 100644 (file)
@@ -226,7 +226,8 @@ static int xsk_map_delete_elem(struct bpf_map *map, void *key)
 
 static int xsk_map_redirect(struct bpf_map *map, u32 ifindex, u64 flags)
 {
-       return __bpf_xdp_redirect_map(map, ifindex, flags, __xsk_map_lookup_elem);
+       return __bpf_xdp_redirect_map(map, ifindex, flags, 0,
+                                     __xsk_map_lookup_elem);
 }
 
 void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
index 45ceca4..520434e 100644 (file)
@@ -41,6 +41,7 @@ tprogs-y += test_map_in_map
 tprogs-y += per_socket_stats_example
 tprogs-y += xdp_redirect
 tprogs-y += xdp_redirect_map
+tprogs-y += xdp_redirect_map_multi
 tprogs-y += xdp_redirect_cpu
 tprogs-y += xdp_monitor
 tprogs-y += xdp_rxq_info
@@ -99,6 +100,7 @@ test_map_in_map-objs := test_map_in_map_user.o
 per_socket_stats_example-objs := cookie_uid_helper_example.o
 xdp_redirect-objs := xdp_redirect_user.o
 xdp_redirect_map-objs := xdp_redirect_map_user.o
+xdp_redirect_map_multi-objs := xdp_redirect_map_multi_user.o
 xdp_redirect_cpu-objs := xdp_redirect_cpu_user.o
 xdp_monitor-objs := xdp_monitor_user.o
 xdp_rxq_info-objs := xdp_rxq_info_user.o
@@ -160,6 +162,7 @@ always-y += tcp_tos_reflect_kern.o
 always-y += tcp_dumpstats_kern.o
 always-y += xdp_redirect_kern.o
 always-y += xdp_redirect_map_kern.o
+always-y += xdp_redirect_map_multi_kern.o
 always-y += xdp_redirect_cpu_kern.o
 always-y += xdp_monitor_kern.o
 always-y += xdp_rxq_info_kern.o
index 26dcd4d..9b19323 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 
-/**
+/*
  * ibumad BPF sample kernel side
  *
  * This program is free software; you can redistribute it and/or
index d83d810..0746ca5 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 
-/**
+/*
  * ibumad BPF sample user side
  *
  * This program is free software; you can redistribute it and/or
index 74a4583..0006126 100644 (file)
@@ -67,6 +67,8 @@ static void usage(const char *prog)
                "usage: %s [OPTS] interface-list\n"
                "\nOPTS:\n"
                "    -d    detach program\n"
+               "    -S    use skb-mode\n"
+               "    -F    force loading prog\n"
                "    -D    direct table lookups (skip fib rules)\n",
                prog);
 }
diff --git a/samples/bpf/xdp_redirect_map_multi_kern.c b/samples/bpf/xdp_redirect_map_multi_kern.c
new file mode 100644 (file)
index 0000000..71aa23d
--- /dev/null
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(max_entries, 32);
+} forward_map_general SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 32);
+} forward_map_native SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __type(key, u32);
+       __type(value, long);
+       __uint(max_entries, 1);
+} rxcnt SEC(".maps");
+
+/* map to store egress interfaces mac addresses, set the
+ * max_entries to 1 and extend it in user sapce prog.
+ */
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, u32);
+       __type(value, __be64);
+       __uint(max_entries, 1);
+} mac_map SEC(".maps");
+
+static int xdp_redirect_map(struct xdp_md *ctx, void *forward_map)
+{
+       long *value;
+       u32 key = 0;
+
+       /* count packet in global counter */
+       value = bpf_map_lookup_elem(&rxcnt, &key);
+       if (value)
+               *value += 1;
+
+       return bpf_redirect_map(forward_map, key,
+                               BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_redirect_general")
+int xdp_redirect_map_general(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &forward_map_general);
+}
+
+SEC("xdp_redirect_native")
+int xdp_redirect_map_native(struct xdp_md *ctx)
+{
+       return xdp_redirect_map(ctx, &forward_map_native);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       u32 key = ctx->egress_ifindex;
+       struct ethhdr *eth = data;
+       __be64 *mac;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       mac = bpf_map_lookup_elem(&mac_map, &key);
+       if (mac)
+               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_map_multi_user.c b/samples/bpf/xdp_redirect_map_multi_user.c
new file mode 100644 (file)
index 0000000..84cdbbe
--- /dev/null
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+static int rxcnt_map_fd;
+
+static void int_exit(int sig)
+{
+       __u32 prog_id = 0;
+       int i;
+
+       for (i = 0; ifaces[i] > 0; i++) {
+               if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+                       printf("bpf_get_link_xdp_id failed\n");
+                       exit(1);
+               }
+               if (prog_id)
+                       bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+       }
+
+       exit(0);
+}
+
+static void poll_stats(int interval)
+{
+       unsigned int nr_cpus = bpf_num_possible_cpus();
+       __u64 values[nr_cpus], prev[nr_cpus];
+
+       memset(prev, 0, sizeof(prev));
+
+       while (1) {
+               __u64 sum = 0;
+               __u32 key = 0;
+               int i;
+
+               sleep(interval);
+               assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
+               for (i = 0; i < nr_cpus; i++)
+                       sum += (values[i] - prev[i]);
+               if (sum)
+                       printf("Forwarding %10llu pkt/s\n", sum / interval);
+               memcpy(prev, values, sizeof(values));
+       }
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+       char ifname[IF_NAMESIZE];
+       struct ifreq ifr;
+       int fd, ret = -1;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return ret;
+
+       if (!if_indextoname(ifindex, ifname))
+               goto err_out;
+
+       strcpy(ifr.ifr_name, ifname);
+
+       if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+               goto err_out;
+
+       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+       ret = 0;
+
+err_out:
+       close(fd);
+       return ret;
+}
+
+static int update_mac_map(struct bpf_object *obj)
+{
+       int i, ret = -1, mac_map_fd;
+       unsigned char mac_addr[6];
+       unsigned int ifindex;
+
+       mac_map_fd = bpf_object__find_map_fd_by_name(obj, "mac_map");
+       if (mac_map_fd < 0) {
+               printf("find mac map fd failed\n");
+               return ret;
+       }
+
+       for (i = 0; ifaces[i] > 0; i++) {
+               ifindex = ifaces[i];
+
+               ret = get_mac_addr(ifindex, mac_addr);
+               if (ret < 0) {
+                       printf("get interface %d mac failed\n", ifindex);
+                       return ret;
+               }
+
+               ret = bpf_map_update_elem(mac_map_fd, &ifindex, mac_addr, 0);
+               if (ret) {
+                       perror("bpf_update_elem mac_map_fd");
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+               "OPTS:\n"
+               "    -S    use skb-mode\n"
+               "    -N    enforce native mode\n"
+               "    -F    force loading prog\n"
+               "    -X    load xdp program on egress\n",
+               prog);
+}
+
+int main(int argc, char **argv)
+{
+       int i, ret, opt, forward_map_fd, max_ifindex = 0;
+       struct bpf_program *ingress_prog, *egress_prog;
+       int ingress_prog_fd, egress_prog_fd = 0;
+       struct bpf_devmap_val devmap_val;
+       bool attach_egress_prog = false;
+       char ifname[IF_NAMESIZE];
+       struct bpf_map *mac_map;
+       struct bpf_object *obj;
+       unsigned int ifindex;
+       char filename[256];
+
+       while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+               switch (opt) {
+               case 'S':
+                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       /* default, set below */
+                       break;
+               case 'F':
+                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       break;
+               case 'X':
+                       attach_egress_prog = true;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+               xdp_flags |= XDP_FLAGS_DRV_MODE;
+       } else if (attach_egress_prog) {
+               printf("Load xdp program on egress with SKB mode not supported yet\n");
+               return 1;
+       }
+
+       if (optind == argc) {
+               printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+               return 1;
+       }
+
+       printf("Get interfaces");
+       for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+               ifaces[i] = if_nametoindex(argv[optind + i]);
+               if (!ifaces[i])
+                       ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+               if (!if_indextoname(ifaces[i], ifname)) {
+                       perror("Invalid interface name or i");
+                       return 1;
+               }
+
+               /* Find the largest index number */
+               if (ifaces[i] > max_ifindex)
+                       max_ifindex = ifaces[i];
+
+               printf(" %d", ifaces[i]);
+       }
+       printf("\n");
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+       obj = bpf_object__open(filename);
+       if (libbpf_get_error(obj)) {
+               printf("ERROR: opening BPF object file failed\n");
+               obj = NULL;
+               goto err_out;
+       }
+
+       /* Reset the map size to max ifindex + 1 */
+       if (attach_egress_prog) {
+               mac_map = bpf_object__find_map_by_name(obj, "mac_map");
+               ret = bpf_map__resize(mac_map, max_ifindex + 1);
+               if (ret < 0) {
+                       printf("ERROR: reset mac map size failed\n");
+                       goto err_out;
+               }
+       }
+
+       /* load BPF program */
+       if (bpf_object__load(obj)) {
+               printf("ERROR: loading BPF object file failed\n");
+               goto err_out;
+       }
+
+       if (xdp_flags & XDP_FLAGS_SKB_MODE) {
+               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_general");
+               forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_general");
+       } else {
+               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_native");
+               forward_map_fd = bpf_object__find_map_fd_by_name(obj, "forward_map_native");
+       }
+       if (!ingress_prog || forward_map_fd < 0) {
+               printf("finding ingress_prog/forward_map in obj file failed\n");
+               goto err_out;
+       }
+
+       ingress_prog_fd = bpf_program__fd(ingress_prog);
+       if (ingress_prog_fd < 0) {
+               printf("find ingress_prog fd failed\n");
+               goto err_out;
+       }
+
+       rxcnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rxcnt");
+       if (rxcnt_map_fd < 0) {
+               printf("bpf_object__find_map_fd_by_name failed\n");
+               goto err_out;
+       }
+
+       if (attach_egress_prog) {
+               /* Update mac_map with all egress interfaces' mac addr */
+               if (update_mac_map(obj) < 0) {
+                       printf("Error: update mac map failed");
+                       goto err_out;
+               }
+
+               /* Find egress prog fd */
+               egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+               if (!egress_prog) {
+                       printf("finding egress_prog in obj file failed\n");
+                       goto err_out;
+               }
+               egress_prog_fd = bpf_program__fd(egress_prog);
+               if (egress_prog_fd < 0) {
+                       printf("find egress_prog fd failed\n");
+                       goto err_out;
+               }
+       }
+
+       /* Remove attached program when program is interrupted or killed */
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       /* Init forward multicast groups */
+       for (i = 0; ifaces[i] > 0; i++) {
+               ifindex = ifaces[i];
+
+               /* bind prog_fd to each interface */
+               ret = bpf_set_link_xdp_fd(ifindex, ingress_prog_fd, xdp_flags);
+               if (ret) {
+                       printf("Set xdp fd failed on %d\n", ifindex);
+                       goto err_out;
+               }
+
+               /* Add all the interfaces to forward group and attach
+                * egress devmap programe if exist
+                */
+               devmap_val.ifindex = ifindex;
+               devmap_val.bpf_prog.fd = egress_prog_fd;
+               ret = bpf_map_update_elem(forward_map_fd, &ifindex, &devmap_val, 0);
+               if (ret) {
+                       perror("bpf_map_update_elem forward_map");
+                       goto err_out;
+               }
+       }
+
+       poll_stats(2);
+
+       return 0;
+
+err_out:
+       return 1;
+}
index 706475e..495e098 100644 (file)
@@ -103,7 +103,8 @@ static void usage(const char *prog)
        fprintf(stderr,
                "%s: %s [OPTS] <ifname|ifindex>\n\n"
                "OPTS:\n"
-               "    -F    force loading prog\n",
+               "    -F    force loading prog\n"
+               "    -S    use skb-mode\n",
                __func__, prog);
 }
 
index d16d289..d73232b 100644 (file)
@@ -136,7 +136,7 @@ endif
 
 BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o) $(OUTPUT)disasm.o
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)                           \
@@ -180,6 +180,9 @@ endif
 
 CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
 
+$(BOOTSTRAP_OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+       $(QUIET_CC)$(HOSTCC) $(CFLAGS) -c -MMD -o $@ $<
+
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
        $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
index 13b0aa7..1d71ff8 100644 (file)
@@ -713,6 +713,7 @@ static int do_skeleton(int argc, char **argv)
                #ifndef %2$s                                                \n\
                #define %2$s                                                \n\
                                                                            \n\
+               #include <errno.h>                                          \n\
                #include <stdlib.h>                                         \n\
                #include <bpf/libbpf.h>                                     \n\
                                                                            \n\
@@ -793,18 +794,23 @@ static int do_skeleton(int argc, char **argv)
                %1$s__open_opts(const struct bpf_object_open_opts *opts)    \n\
                {                                                           \n\
                        struct %1$s *obj;                                   \n\
+                       int err;                                            \n\
                                                                            \n\
                        obj = (struct %1$s *)calloc(1, sizeof(*obj));       \n\
-                       if (!obj)                                           \n\
+                       if (!obj) {                                         \n\
+                               errno = ENOMEM;                             \n\
                                return NULL;                                \n\
-                       if (%1$s__create_skeleton(obj))                     \n\
-                               goto err;                                   \n\
-                       if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\
-                               goto err;                                   \n\
+                       }                                                   \n\
+                                                                           \n\
+                       err = %1$s__create_skeleton(obj);                   \n\
+                       err = err ?: bpf_object__open_skeleton(obj->skeleton, opts);\n\
+                       if (err)                                            \n\
+                               goto err_out;                               \n\
                                                                            \n\
                        return obj;                                         \n\
-               err:                                                        \n\
+               err_out:                                                    \n\
                        %1$s__destroy(obj);                                 \n\
+                       errno = -err;                                       \n\
                        return NULL;                                        \n\
                }                                                           \n\
                                                                            \n\
@@ -824,12 +830,15 @@ static int do_skeleton(int argc, char **argv)
                %1$s__open_and_load(void)                                   \n\
                {                                                           \n\
                        struct %1$s *obj;                                   \n\
+                       int err;                                            \n\
                                                                            \n\
                        obj = %1$s__open();                                 \n\
                        if (!obj)                                           \n\
                                return NULL;                                \n\
-                       if (%1$s__load(obj)) {                              \n\
+                       err = %1$s__load(obj);                              \n\
+                       if (err) {                                          \n\
                                %1$s__destroy(obj);                         \n\
+                               errno = -err;                               \n\
                                return NULL;                                \n\
                        }                                                   \n\
                        return obj;                                         \n\
@@ -860,7 +869,7 @@ static int do_skeleton(int argc, char **argv)
                                                                            \n\
                        s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
                        if (!s)                                             \n\
-                               return -1;                                  \n\
+                               goto err;                                   \n\
                        obj->skeleton = s;                                  \n\
                                                                            \n\
                        s->sz = sizeof(*s);                                 \n\
@@ -949,7 +958,7 @@ static int do_skeleton(int argc, char **argv)
                        return 0;                                           \n\
                err:                                                        \n\
                        bpf_object__destroy_skeleton(s);                    \n\
-                       return -1;                                          \n\
+                       return -ENOMEM;                                     \n\
                }                                                           \n\
                                                                            \n\
                #endif /* %s */                                             \n\
index 7f2817d..3ddfd48 100644 (file)
@@ -341,8 +341,10 @@ static int do_batch(int argc, char **argv)
                n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
                if (!n_argc)
                        continue;
-               if (n_argc < 0)
+               if (n_argc < 0) {
+                       err = n_argc;
                        goto err_close;
+               }
 
                if (json_output) {
                        jsonw_start_object(json_wtr);
index 418b9b8..bf9252c 100644 (file)
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
  *             Look up an element with the given *key* in the map referred to
  *             by the file descriptor *fd*, and if found, delete the element.
  *
+ *             For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ *             types, the *flags* argument needs to be set to 0, but for other
+ *             map types, it may be specified as:
+ *
+ *             **BPF_F_LOCK**
+ *                     Look up and delete the value of a spin-locked map
+ *                     without returning the lock. This must be specified if
+ *                     the elements contain a spinlock.
+ *
  *             The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
  *             implement this command as a "pop" operation, deleting the top
  *             element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
  *             This command is only valid for the following map types:
  *             * **BPF_MAP_TYPE_QUEUE**
  *             * **BPF_MAP_TYPE_STACK**
+ *             * **BPF_MAP_TYPE_HASH**
+ *             * **BPF_MAP_TYPE_PERCPU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_HASH**
+ *             * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
  *
  *     Return
  *             Returns zero on success. On error, -1 is returned and *errno*
@@ -981,6 +994,8 @@ enum bpf_attach_type {
        BPF_SK_LOOKUP,
        BPF_XDP,
        BPF_SK_SKB_VERDICT,
+       BPF_SK_REUSEPORT_SELECT,
+       BPF_SK_REUSEPORT_SELECT_OR_MIGRATE,
        __MAX_BPF_ATTACH_TYPE
 };
 
@@ -2542,8 +2557,12 @@ union bpf_attr {
  *             The lower two bits of *flags* are used as the return code if
  *             the map lookup fails. This is so that the return value can be
  *             one of the XDP program return codes up to **XDP_TX**, as chosen
- *             by the caller. Any higher bits in the *flags* argument must be
- *             unset.
+ *             by the caller. The higher bits of *flags* can be set to
+ *             BPF_F_BROADCAST or BPF_F_EXCLUDE_INGRESS as defined below.
+ *
+ *             With BPF_F_BROADCAST the packet will be broadcasted to all the
+ *             interfaces in the map, with BPF_F_EXCLUDE_INGRESS the ingress
+ *             interface will be excluded when do broadcasting.
  *
  *             See also **bpf_redirect**\ (), which only supports redirecting
  *             to an ifindex, but doesn't require a map to do so.
@@ -5109,6 +5128,12 @@ enum {
        BPF_F_BPRM_SECUREEXEC   = (1ULL << 0),
 };
 
+/* Flags for bpf_redirect_map helper */
+enum {
+       BPF_F_BROADCAST         = (1ULL << 3),
+       BPF_F_EXCLUDE_INGRESS   = (1ULL << 4),
+};
+
 #define __bpf_md_ptr(type, name)       \
 union {                                        \
        type name;                      \
@@ -5393,6 +5418,20 @@ struct sk_reuseport_md {
        __u32 ip_protocol;      /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
        __u32 bind_inany;       /* Is sock bound to an INANY address? */
        __u32 hash;             /* A hash of the packet 4 tuples */
+       /* When reuse->migrating_sk is NULL, it is selecting a sk for the
+        * new incoming connection request (e.g. selecting a listen sk for
+        * the received SYN in the TCP case).  reuse->sk is one of the sk
+        * in the reuseport group. The bpf prog can use reuse->sk to learn
+        * the local listening ip/port without looking into the skb.
+        *
+        * When reuse->migrating_sk is not NULL, reuse->sk is closed and
+        * reuse->migrating_sk is the socket that needs to be migrated
+        * to another listening socket.  migrating_sk could be a fullsock
+        * sk that is fully established or a reqsk that is in-the-middle
+        * of 3-way handshake.
+        */
+       __bpf_md_ptr(struct bpf_sock *, sk);
+       __bpf_md_ptr(struct bpf_sock *, migrating_sk);
 };
 
 #define BPF_TAG_SIZE   8
index e43e189..ec14aa7 100644 (file)
@@ -223,18 +223,14 @@ install_lib: all_cmd
                $(call do_install_mkdir,$(libdir_SQ)); \
                cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
+INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
+                 bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h             \
+                 bpf_endian.h bpf_core_read.h skel_internal.h
+
 install_headers: $(BPF_HELPER_DEFS)
-       $(call QUIET_INSTALL, headers) \
-               $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
-               $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
-               $(call do_install,btf.h,$(prefix)/include/bpf,644); \
-               $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
-               $(call do_install,xsk.h,$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
-               $(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \
-               $(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);
+       $(call QUIET_INSTALL, headers)                                       \
+               $(foreach hdr,$(INSTALL_HEADERS),                            \
+                       $(call do_install,$(hdr),$(prefix)/include/bpf,644);)
 
 install_pkgconfig: $(PC_FILE)
        $(call QUIET_INSTALL, $(PC_FILE)) \
index bba48ff..86dcac4 100644 (file)
@@ -80,6 +80,7 @@ static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
 int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, '\0', sizeof(attr));
 
@@ -102,7 +103,8 @@ int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
        else
                attr.inner_map_fd = create_attr->inner_map_fd;
 
-       return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
@@ -160,6 +162,7 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
                               __u32 map_flags, int node)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, '\0', sizeof(attr));
 
@@ -178,7 +181,8 @@ int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
                attr.numa_node = node;
        }
 
-       return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
@@ -222,10 +226,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
        int fd;
 
        if (!load_attr->log_buf != !load_attr->log_buf_sz)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.prog_type = load_attr->prog_type;
@@ -281,8 +285,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
                                                        load_attr->func_info_cnt,
                                                        load_attr->func_info_rec_size,
                                                        attr.func_info_rec_size);
-                       if (!finfo)
+                       if (!finfo) {
+                               errno = E2BIG;
                                goto done;
+                       }
 
                        attr.func_info = ptr_to_u64(finfo);
                        attr.func_info_rec_size = load_attr->func_info_rec_size;
@@ -293,8 +299,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
                                                        load_attr->line_info_cnt,
                                                        load_attr->line_info_rec_size,
                                                        attr.line_info_rec_size);
-                       if (!linfo)
+                       if (!linfo) {
+                               errno = E2BIG;
                                goto done;
+                       }
 
                        attr.line_info = ptr_to_u64(linfo);
                        attr.line_info_rec_size = load_attr->line_info_rec_size;
@@ -318,9 +326,10 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)
 
        fd = sys_bpf_prog_load(&attr, sizeof(attr));
 done:
+       /* free() doesn't affect errno, so we don't need to restore it */
        free(finfo);
        free(linfo);
-       return fd;
+       return libbpf_err_errno(fd);
 }
 
 int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
@@ -329,7 +338,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
        struct bpf_prog_load_params p = {};
 
        if (!load_attr || !log_buf != !log_buf_sz)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        p.prog_type = load_attr->prog_type;
        p.expected_attach_type = load_attr->expected_attach_type;
@@ -391,6 +400,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
                       int log_level)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.prog_type = type;
@@ -404,13 +414,15 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
        attr.kern_version = kern_version;
        attr.prog_flags = prog_flags;
 
-       return sys_bpf_prog_load(&attr, sizeof(attr));
+       fd = sys_bpf_prog_load(&attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_map_update_elem(int fd, const void *key, const void *value,
                        __u64 flags)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
@@ -418,24 +430,28 @@ int bpf_map_update_elem(int fd, const void *key, const void *value,
        attr.value = ptr_to_u64(value);
        attr.flags = flags;
 
-       return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.value = ptr_to_u64(value);
 
-       return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
@@ -443,17 +459,33 @@ int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
        attr.value = ptr_to_u64(value);
        attr.flags = flags;
 
-       return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 {
        union bpf_attr attr;
+       int ret;
+
+       memset(&attr, 0, sizeof(attr));
+       attr.map_fd = fd;
+       attr.key = ptr_to_u64(key);
+       attr.value = ptr_to_u64(value);
+
+       ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
+}
+
+int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
+{
+       union bpf_attr attr;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.value = ptr_to_u64(value);
+       attr.flags = flags;
 
        return sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
 }
@@ -461,34 +493,40 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 int bpf_map_delete_elem(int fd, const void *key)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
 
-       return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
        attr.key = ptr_to_u64(key);
        attr.next_key = ptr_to_u64(next_key);
 
-       return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_map_freeze(int fd)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_fd = fd;
 
-       return sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
@@ -500,7 +538,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
        int ret;
 
        if (!OPTS_VALID(opts, bpf_map_batch_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.batch.map_fd = fd;
@@ -515,7 +553,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
        ret = sys_bpf(cmd, &attr, sizeof(attr));
        *count = attr.batch.count;
 
-       return ret;
+       return libbpf_err_errno(ret);
 }
 
 int bpf_map_delete_batch(int fd, void *keys, __u32 *count,
@@ -552,22 +590,26 @@ int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count,
 int bpf_obj_pin(int fd, const char *pathname)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.pathname = ptr_to_u64((void *)pathname);
        attr.bpf_fd = fd;
 
-       return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_obj_get(const char *pathname)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.pathname = ptr_to_u64((void *)pathname);
 
-       return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
@@ -585,9 +627,10 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
                          const struct bpf_prog_attach_opts *opts)
 {
        union bpf_attr attr;
+       int ret;
 
        if (!OPTS_VALID(opts, bpf_prog_attach_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.target_fd     = target_fd;
@@ -596,30 +639,35 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
        attr.attach_flags  = OPTS_GET(opts, flags, 0);
        attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
 
-       return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.target_fd   = target_fd;
        attr.attach_type = type;
 
-       return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.target_fd   = target_fd;
        attr.attach_bpf_fd = prog_fd;
        attr.attach_type = type;
 
-       return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_link_create(int prog_fd, int target_fd,
@@ -628,15 +676,16 @@ int bpf_link_create(int prog_fd, int target_fd,
 {
        __u32 target_btf_id, iter_info_len;
        union bpf_attr attr;
+       int fd;
 
        if (!OPTS_VALID(opts, bpf_link_create_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        iter_info_len = OPTS_GET(opts, iter_info_len, 0);
        target_btf_id = OPTS_GET(opts, target_btf_id, 0);
 
        if (iter_info_len && target_btf_id)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.link_create.prog_fd = prog_fd;
@@ -652,26 +701,30 @@ int bpf_link_create(int prog_fd, int target_fd,
                attr.link_create.target_btf_id = target_btf_id;
        }
 
-       return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_link_detach(int link_fd)
 {
        union bpf_attr attr;
+       int ret;
 
        memset(&attr, 0, sizeof(attr));
        attr.link_detach.link_fd = link_fd;
 
-       return sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_link_update(int link_fd, int new_prog_fd,
                    const struct bpf_link_update_opts *opts)
 {
        union bpf_attr attr;
+       int ret;
 
        if (!OPTS_VALID(opts, bpf_link_update_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.link_update.link_fd = link_fd;
@@ -679,17 +732,20 @@ int bpf_link_update(int link_fd, int new_prog_fd,
        attr.link_update.flags = OPTS_GET(opts, flags, 0);
        attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
 
-       return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
 
 int bpf_iter_create(int link_fd)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.iter_create.link_fd = link_fd;
 
-       return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
@@ -706,10 +762,12 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
        attr.query.prog_ids     = ptr_to_u64(prog_ids);
 
        ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+
        if (attach_flags)
                *attach_flags = attr.query.attach_flags;
        *prog_cnt = attr.query.prog_cnt;
-       return ret;
+
+       return libbpf_err_errno(ret);
 }
 
 int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
@@ -727,13 +785,15 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
        attr.test.repeat = repeat;
 
        ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
        if (size_out)
                *size_out = attr.test.data_size_out;
        if (retval)
                *retval = attr.test.retval;
        if (duration)
                *duration = attr.test.duration;
-       return ret;
+
+       return libbpf_err_errno(ret);
 }
 
 int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
@@ -742,7 +802,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
        int ret;
 
        if (!test_attr->data_out && test_attr->data_size_out > 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.test.prog_fd = test_attr->prog_fd;
@@ -757,11 +817,13 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr)
        attr.test.repeat = test_attr->repeat;
 
        ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
        test_attr->data_size_out = attr.test.data_size_out;
        test_attr->ctx_size_out = attr.test.ctx_size_out;
        test_attr->retval = attr.test.retval;
        test_attr->duration = attr.test.duration;
-       return ret;
+
+       return libbpf_err_errno(ret);
 }
 
 int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
@@ -770,7 +832,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
        int ret;
 
        if (!OPTS_VALID(opts, bpf_test_run_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.test.prog_fd = prog_fd;
@@ -788,11 +850,13 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
        attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
 
        ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+
        OPTS_SET(opts, data_size_out, attr.test.data_size_out);
        OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
        OPTS_SET(opts, duration, attr.test.duration);
        OPTS_SET(opts, retval, attr.test.retval);
-       return ret;
+
+       return libbpf_err_errno(ret);
 }
 
 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
@@ -807,7 +871,7 @@ static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
        if (!err)
                *next_id = attr.next_id;
 
-       return err;
+       return libbpf_err_errno(err);
 }
 
 int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
@@ -833,41 +897,49 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
 int bpf_prog_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.prog_id = id;
 
-       return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_map_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.map_id = id;
 
-       return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_btf_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.btf_id = id;
 
-       return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_link_get_fd_by_id(__u32 id)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.link_id = id;
 
-       return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
@@ -881,21 +953,24 @@ int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
        attr.info.info = ptr_to_u64(info);
 
        err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+
        if (!err)
                *info_len = attr.info.info_len;
 
-       return err;
+       return libbpf_err_errno(err);
 }
 
 int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.raw_tracepoint.name = ptr_to_u64(name);
        attr.raw_tracepoint.prog_fd = prog_fd;
 
-       return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
@@ -915,12 +990,13 @@ retry:
        }
 
        fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
-       if (fd == -1 && !do_log && log_buf && log_buf_size) {
+
+       if (fd < 0 && !do_log && log_buf && log_buf_size) {
                do_log = true;
                goto retry;
        }
 
-       return fd;
+       return libbpf_err_errno(fd);
 }
 
 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
@@ -937,37 +1013,42 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
        attr.task_fd_query.buf_len = *buf_len;
 
        err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+
        *buf_len = attr.task_fd_query.buf_len;
        *prog_id = attr.task_fd_query.prog_id;
        *fd_type = attr.task_fd_query.fd_type;
        *probe_offset = attr.task_fd_query.probe_offset;
        *probe_addr = attr.task_fd_query.probe_addr;
 
-       return err;
+       return libbpf_err_errno(err);
 }
 
 int bpf_enable_stats(enum bpf_stats_type type)
 {
        union bpf_attr attr;
+       int fd;
 
        memset(&attr, 0, sizeof(attr));
        attr.enable_stats.type = type;
 
-       return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+       fd = sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+       return libbpf_err_errno(fd);
 }
 
 int bpf_prog_bind_map(int prog_fd, int map_fd,
                      const struct bpf_prog_bind_opts *opts)
 {
        union bpf_attr attr;
+       int ret;
 
        if (!OPTS_VALID(opts, bpf_prog_bind_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memset(&attr, 0, sizeof(attr));
        attr.prog_bind_map.prog_fd = prog_fd;
        attr.prog_bind_map.map_fd = map_fd;
        attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
 
-       return sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+       ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+       return libbpf_err_errno(ret);
 }
index 875dde2..4f758f8 100644 (file)
@@ -124,6 +124,8 @@ LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value,
                                         __u64 flags);
 LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
                                              void *value);
+LIBBPF_API int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key,
+                                                   void *value, __u64 flags);
 LIBBPF_API int bpf_map_delete_elem(int fd, const void *key);
 LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key);
 LIBBPF_API int bpf_map_freeze(int fd);
index 9720dc0..b9987c3 100644 (file)
@@ -158,4 +158,70 @@ enum libbpf_tristate {
 #define __kconfig __attribute__((section(".kconfig")))
 #define __ksym __attribute__((section(".ksyms")))
 
+#ifndef ___bpf_concat
+#define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
+#define ___bpf_narg(...) \
+       ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+       ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...)                      \
+({                                                             \
+       static const char ___fmt[] = fmt;                       \
+       unsigned long long ___param[___bpf_narg(args)];         \
+                                                               \
+       _Pragma("GCC diagnostic push")                          \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
+       ___bpf_fill(___param, args);                            \
+       _Pragma("GCC diagnostic pop")                           \
+                                                               \
+       bpf_seq_printf(seq, ___fmt, sizeof(___fmt),             \
+                      ___param, sizeof(___param));             \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...)              \
+({                                                             \
+       static const char ___fmt[] = fmt;                       \
+       unsigned long long ___param[___bpf_narg(args)];         \
+                                                               \
+       _Pragma("GCC diagnostic push")                          \
+       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
+       ___bpf_fill(___param, args);                            \
+       _Pragma("GCC diagnostic pop")                           \
+                                                               \
+       bpf_snprintf(out, out_size, ___fmt,                     \
+                    ___param, sizeof(___param));               \
+})
+
 #endif
index 3ed1a27..5c50309 100644 (file)
@@ -106,7 +106,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
        nr_linfo = info->nr_line_info;
 
        if (!nr_linfo)
-               return NULL;
+               return errno = EINVAL, NULL;
 
        /*
         * The min size that bpf_prog_linfo has to access for
@@ -114,11 +114,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
         */
        if (info->line_info_rec_size <
            offsetof(struct bpf_line_info, file_name_off))
-               return NULL;
+               return errno = EINVAL, NULL;
 
        prog_linfo = calloc(1, sizeof(*prog_linfo));
        if (!prog_linfo)
-               return NULL;
+               return errno = ENOMEM, NULL;
 
        /* Copy xlated line_info */
        prog_linfo->nr_linfo = nr_linfo;
@@ -174,7 +174,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)
 
 err_free:
        bpf_prog_linfo__free(prog_linfo);
-       return NULL;
+       return errno = EINVAL, NULL;
 }
 
 const struct bpf_line_info *
@@ -186,11 +186,11 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
        const __u64 *jited_linfo;
 
        if (func_idx >= prog_linfo->nr_jited_func)
-               return NULL;
+               return errno = ENOENT, NULL;
 
        nr_linfo = prog_linfo->nr_jited_linfo_per_func[func_idx];
        if (nr_skip >= nr_linfo)
-               return NULL;
+               return errno = ENOENT, NULL;
 
        start = prog_linfo->jited_linfo_func_idx[func_idx] + nr_skip;
        jited_rec_size = prog_linfo->jited_rec_size;
@@ -198,7 +198,7 @@ bpf_prog_linfo__lfind_addr_func(const struct bpf_prog_linfo *prog_linfo,
                (start * jited_rec_size);
        jited_linfo = raw_jited_linfo;
        if (addr < *jited_linfo)
-               return NULL;
+               return errno = ENOENT, NULL;
 
        nr_linfo -= nr_skip;
        rec_size = prog_linfo->rec_size;
@@ -225,13 +225,13 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,
 
        nr_linfo = prog_linfo->nr_linfo;
        if (nr_skip >= nr_linfo)
-               return NULL;
+               return errno = ENOENT, NULL;
 
        rec_size = prog_linfo->rec_size;
        raw_linfo = prog_linfo->raw_linfo + (nr_skip * rec_size);
        linfo = raw_linfo;
        if (insn_off < linfo->insn_off)
-               return NULL;
+               return errno = ENOENT, NULL;
 
        nr_linfo -= nr_skip;
        for (i = 0; i < nr_linfo; i++) {
index 8c954eb..d6bfbe0 100644 (file)
        #define bpf_target_sparc
        #define bpf_target_defined
 #else
-       #undef bpf_target_defined
-#endif
 
 /* Fall back to what the compiler says */
-#ifndef bpf_target_defined
 #if defined(__x86_64__)
        #define bpf_target_x86
+       #define bpf_target_defined
 #elif defined(__s390__)
        #define bpf_target_s390
+       #define bpf_target_defined
 #elif defined(__arm__)
        #define bpf_target_arm
+       #define bpf_target_defined
 #elif defined(__aarch64__)
        #define bpf_target_arm64
+       #define bpf_target_defined
 #elif defined(__mips__)
        #define bpf_target_mips
+       #define bpf_target_defined
 #elif defined(__powerpc__)
        #define bpf_target_powerpc
+       #define bpf_target_defined
 #elif defined(__sparc__)
        #define bpf_target_sparc
+       #define bpf_target_defined
+#endif /* no compiler target */
+
 #endif
+
+#ifndef __BPF_TARGET_MISSING
+#define __BPF_TARGET_MISSING "GCC error \"Must specify a BPF target arch via __TARGET_ARCH_xxx\""
 #endif
 
 #if defined(bpf_target_x86)
@@ -287,7 +296,7 @@ struct pt_regs;
 #elif defined(bpf_target_sparc)
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)                ({ (ip) = PT_REGS_RET(ctx); })
 #define BPF_KRETPROBE_READ_RET_IP              BPF_KPROBE_READ_RET_IP
-#else
+#elif defined(bpf_target_defined)
 #define BPF_KPROBE_READ_RET_IP(ip, ctx)                                            \
        ({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
 #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)                                 \
@@ -295,13 +304,48 @@ struct pt_regs;
                          (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
 #endif
 
+#if !defined(bpf_target_defined)
+
+#define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define PT_REGS_PARM1_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM2_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_SP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define PT_REGS_IP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
+
+#endif /* !defined(bpf_target_defined) */
+
+#ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
+#endif
+#ifndef ___bpf_apply
 #define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#endif
+#ifndef ___bpf_nth
 #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#endif
+#ifndef ___bpf_narg
 #define ___bpf_narg(...) \
        ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
-       ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+#endif
 
 #define ___bpf_ctx_cast0() ctx
 #define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
@@ -413,56 +457,4 @@ typeof(name(0)) name(struct pt_regs *ctx)                              \
 }                                                                          \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 
-#define ___bpf_fill0(arr, p, x) do {} while (0)
-#define ___bpf_fill1(arr, p, x) arr[p] = x
-#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
-#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
-#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
-#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
-#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
-#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
-#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
-#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
-#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
-#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
-#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
-#define ___bpf_fill(arr, args...) \
-       ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
-
-/*
- * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
- * in a structure.
- */
-#define BPF_SEQ_PRINTF(seq, fmt, args...)                      \
-({                                                             \
-       static const char ___fmt[] = fmt;                       \
-       unsigned long long ___param[___bpf_narg(args)];         \
-                                                               \
-       _Pragma("GCC diagnostic push")                          \
-       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
-       ___bpf_fill(___param, args);                            \
-       _Pragma("GCC diagnostic pop")                           \
-                                                               \
-       bpf_seq_printf(seq, ___fmt, sizeof(___fmt),             \
-                      ___param, sizeof(___param));             \
-})
-
-/*
- * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
- * an array of u64.
- */
-#define BPF_SNPRINTF(out, out_size, fmt, args...)              \
-({                                                             \
-       static const char ___fmt[] = fmt;                       \
-       unsigned long long ___param[___bpf_narg(args)];         \
-                                                               \
-       _Pragma("GCC diagnostic push")                          \
-       _Pragma("GCC diagnostic ignored \"-Wint-conversion\"")  \
-       ___bpf_fill(___param, args);                            \
-       _Pragma("GCC diagnostic pop")                           \
-                                                               \
-       bpf_snprintf(out, out_size, ___fmt,                     \
-                    ___param, sizeof(___param));               \
-})
-
 #endif
index d57e13a..b46760b 100644 (file)
@@ -443,7 +443,7 @@ struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
 const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
 {
        if (type_id >= btf->start_id + btf->nr_types)
-               return NULL;
+               return errno = EINVAL, NULL;
        return btf_type_by_id((struct btf *)btf, type_id);
 }
 
@@ -510,7 +510,7 @@ size_t btf__pointer_size(const struct btf *btf)
 int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
 {
        if (ptr_sz != 4 && ptr_sz != 8)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        btf->ptr_sz = ptr_sz;
        return 0;
 }
@@ -537,7 +537,7 @@ enum btf_endianness btf__endianness(const struct btf *btf)
 int btf__set_endianness(struct btf *btf, enum btf_endianness endian)
 {
        if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        btf->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN);
        if (!btf->swapped_endian) {
@@ -568,8 +568,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
        int i;
 
        t = btf__type_by_id(btf, type_id);
-       for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
-            i++) {
+       for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) {
                switch (btf_kind(t)) {
                case BTF_KIND_INT:
                case BTF_KIND_STRUCT:
@@ -592,12 +591,12 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
                case BTF_KIND_ARRAY:
                        array = btf_array(t);
                        if (nelems && array->nelems > UINT32_MAX / nelems)
-                               return -E2BIG;
+                               return libbpf_err(-E2BIG);
                        nelems *= array->nelems;
                        type_id = array->type;
                        break;
                default:
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                }
 
                t = btf__type_by_id(btf, type_id);
@@ -605,9 +604,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 
 done:
        if (size < 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (nelems && size > UINT32_MAX / nelems)
-               return -E2BIG;
+               return libbpf_err(-E2BIG);
 
        return nelems * size;
 }
@@ -640,7 +639,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
                for (i = 0; i < vlen; i++, m++) {
                        align = btf__align_of(btf, m->type);
                        if (align <= 0)
-                               return align;
+                               return libbpf_err(align);
                        max_align = max(max_align, align);
                }
 
@@ -648,7 +647,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
        }
        default:
                pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));
-               return 0;
+               return errno = EINVAL, 0;
        }
 }
 
@@ -667,7 +666,7 @@ int btf__resolve_type(const struct btf *btf, __u32 type_id)
        }
 
        if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        return type_id;
 }
@@ -687,7 +686,7 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)
                        return i;
        }
 
-       return -ENOENT;
+       return libbpf_err(-ENOENT);
 }
 
 __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
@@ -709,7 +708,7 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
                        return i;
        }
 
-       return -ENOENT;
+       return libbpf_err(-ENOENT);
 }
 
 static bool btf_is_modifiable(const struct btf *btf)
@@ -785,12 +784,12 @@ static struct btf *btf_new_empty(struct btf *base_btf)
 
 struct btf *btf__new_empty(void)
 {
-       return btf_new_empty(NULL);
+       return libbpf_ptr(btf_new_empty(NULL));
 }
 
 struct btf *btf__new_empty_split(struct btf *base_btf)
 {
-       return btf_new_empty(base_btf);
+       return libbpf_ptr(btf_new_empty(base_btf));
 }
 
 static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf)
@@ -846,7 +845,7 @@ done:
 
 struct btf *btf__new(const void *data, __u32 size)
 {
-       return btf_new(data, size, NULL);
+       return libbpf_ptr(btf_new(data, size, NULL));
 }
 
 static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
@@ -937,7 +936,8 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
                goto done;
        }
        btf = btf_new(btf_data->d_buf, btf_data->d_size, base_btf);
-       if (IS_ERR(btf))
+       err = libbpf_get_error(btf);
+       if (err)
                goto done;
 
        switch (gelf_getclass(elf)) {
@@ -953,9 +953,9 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf,
        }
 
        if (btf_ext && btf_ext_data) {
-               *btf_ext = btf_ext__new(btf_ext_data->d_buf,
-                                       btf_ext_data->d_size);
-               if (IS_ERR(*btf_ext))
+               *btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+               err = libbpf_get_error(*btf_ext);
+               if (err)
                        goto done;
        } else if (btf_ext) {
                *btf_ext = NULL;
@@ -965,30 +965,24 @@ done:
                elf_end(elf);
        close(fd);
 
-       if (err)
-               return ERR_PTR(err);
-       /*
-        * btf is always parsed before btf_ext, so no need to clean up
-        * btf_ext, if btf loading failed
-        */
-       if (IS_ERR(btf))
+       if (!err)
                return btf;
-       if (btf_ext && IS_ERR(*btf_ext)) {
-               btf__free(btf);
-               err = PTR_ERR(*btf_ext);
-               return ERR_PTR(err);
-       }
-       return btf;
+
+       if (btf_ext)
+               btf_ext__free(*btf_ext);
+       btf__free(btf);
+
+       return ERR_PTR(err);
 }
 
 struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
 {
-       return btf_parse_elf(path, NULL, btf_ext);
+       return libbpf_ptr(btf_parse_elf(path, NULL, btf_ext));
 }
 
 struct btf *btf__parse_elf_split(const char *path, struct btf *base_btf)
 {
-       return btf_parse_elf(path, base_btf, NULL);
+       return libbpf_ptr(btf_parse_elf(path, base_btf, NULL));
 }
 
 static struct btf *btf_parse_raw(const char *path, struct btf *base_btf)
@@ -1056,36 +1050,39 @@ err_out:
 
 struct btf *btf__parse_raw(const char *path)
 {
-       return btf_parse_raw(path, NULL);
+       return libbpf_ptr(btf_parse_raw(path, NULL));
 }
 
 struct btf *btf__parse_raw_split(const char *path, struct btf *base_btf)
 {
-       return btf_parse_raw(path, base_btf);
+       return libbpf_ptr(btf_parse_raw(path, base_btf));
 }
 
 static struct btf *btf_parse(const char *path, struct btf *base_btf, struct btf_ext **btf_ext)
 {
        struct btf *btf;
+       int err;
 
        if (btf_ext)
                *btf_ext = NULL;
 
        btf = btf_parse_raw(path, base_btf);
-       if (!IS_ERR(btf) || PTR_ERR(btf) != -EPROTO)
+       err = libbpf_get_error(btf);
+       if (!err)
                return btf;
-
+       if (err != -EPROTO)
+               return ERR_PTR(err);
        return btf_parse_elf(path, base_btf, btf_ext);
 }
 
 struct btf *btf__parse(const char *path, struct btf_ext **btf_ext)
 {
-       return btf_parse(path, NULL, btf_ext);
+       return libbpf_ptr(btf_parse(path, NULL, btf_ext));
 }
 
 struct btf *btf__parse_split(const char *path, struct btf *base_btf)
 {
-       return btf_parse(path, base_btf, NULL);
+       return libbpf_ptr(btf_parse(path, base_btf, NULL));
 }
 
 static int compare_vsi_off(const void *_a, const void *_b)
@@ -1178,7 +1175,7 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
                }
        }
 
-       return err;
+       return libbpf_err(err);
 }
 
 static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian);
@@ -1191,13 +1188,13 @@ int btf__load(struct btf *btf)
        int err = 0;
 
        if (btf->fd >= 0)
-               return -EEXIST;
+               return libbpf_err(-EEXIST);
 
 retry_load:
        if (log_buf_size) {
                log_buf = malloc(log_buf_size);
                if (!log_buf)
-                       return -ENOMEM;
+                       return libbpf_err(-ENOMEM);
 
                *log_buf = 0;
        }
@@ -1229,7 +1226,7 @@ retry_load:
 
 done:
        free(log_buf);
-       return err;
+       return libbpf_err(err);
 }
 
 int btf__fd(const struct btf *btf)
@@ -1305,7 +1302,7 @@ const void *btf__get_raw_data(const struct btf *btf_ro, __u32 *size)
 
        data = btf_get_raw_data(btf, &data_sz, btf->swapped_endian);
        if (!data)
-               return NULL;
+               return errno = -ENOMEM, NULL;
 
        btf->raw_size = data_sz;
        if (btf->swapped_endian)
@@ -1323,7 +1320,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
        else if (offset - btf->start_str_off < btf->hdr->str_len)
                return btf_strs_data(btf) + (offset - btf->start_str_off);
        else
-               return NULL;
+               return errno = EINVAL, NULL;
 }
 
 const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
@@ -1388,17 +1385,20 @@ exit_free:
 int btf__get_from_id(__u32 id, struct btf **btf)
 {
        struct btf *res;
-       int btf_fd;
+       int err, btf_fd;
 
        *btf = NULL;
        btf_fd = bpf_btf_get_fd_by_id(id);
        if (btf_fd < 0)
-               return -errno;
+               return libbpf_err(-errno);
 
        res = btf_get_from_fd(btf_fd, NULL);
+       err = libbpf_get_error(res);
+
        close(btf_fd);
-       if (IS_ERR(res))
-               return PTR_ERR(res);
+
+       if (err)
+               return libbpf_err(err);
 
        *btf = res;
        return 0;
@@ -1415,31 +1415,30 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
        __s64 key_size, value_size;
        __s32 container_id;
 
-       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==
-           max_name) {
+       if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) {
                pr_warn("map:%s length of '____btf_map_%s' is too long\n",
                        map_name, map_name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        container_id = btf__find_by_name(btf, container_name);
        if (container_id < 0) {
                pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
                         map_name, container_name);
-               return container_id;
+               return libbpf_err(container_id);
        }
 
        container_type = btf__type_by_id(btf, container_id);
        if (!container_type) {
                pr_warn("map:%s cannot find BTF type for container_id:%u\n",
                        map_name, container_id);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) {
                pr_warn("map:%s container_name:%s is an invalid container struct\n",
                        map_name, container_name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        key = btf_members(container_type);
@@ -1448,25 +1447,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
        key_size = btf__resolve_size(btf, key->type);
        if (key_size < 0) {
                pr_warn("map:%s invalid BTF key_type_size\n", map_name);
-               return key_size;
+               return libbpf_err(key_size);
        }
 
        if (expected_key_size != key_size) {
                pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
                        map_name, (__u32)key_size, expected_key_size);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        value_size = btf__resolve_size(btf, value->type);
        if (value_size < 0) {
                pr_warn("map:%s invalid BTF value_type_size\n", map_name);
-               return value_size;
+               return libbpf_err(value_size);
        }
 
        if (expected_value_size != value_size) {
                pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
                        map_name, (__u32)value_size, expected_value_size);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        *key_type_id = key->type;
@@ -1563,11 +1562,11 @@ int btf__find_str(struct btf *btf, const char *s)
 
        /* BTF needs to be in a modifiable state to build string lookup index */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        off = strset__find_str(btf->strs_set, s);
        if (off < 0)
-               return off;
+               return libbpf_err(off);
 
        return btf->start_str_off + off;
 }
@@ -1588,11 +1587,11 @@ int btf__add_str(struct btf *btf, const char *s)
        }
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        off = strset__add_str(btf->strs_set, s);
        if (off < 0)
-               return off;
+               return libbpf_err(off);
 
        btf->hdr->str_len = strset__data_size(btf->strs_set);
 
@@ -1616,7 +1615,7 @@ static int btf_commit_type(struct btf *btf, int data_sz)
 
        err = btf_add_type_idx_entry(btf, btf->hdr->type_len);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        btf->hdr->type_len += data_sz;
        btf->hdr->str_off += data_sz;
@@ -1653,21 +1652,21 @@ int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_t
 
        sz = btf_type_size(src_type);
        if (sz < 0)
-               return sz;
+               return libbpf_err(sz);
 
        /* deconstruct BTF, if necessary, and invalidate raw_data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        memcpy(t, src_type, sz);
 
        err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        return btf_commit_type(btf, sz);
 }
@@ -1688,21 +1687,21 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
 
        /* non-empty name */
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        /* byte_sz must be power of 2 */
        if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 16)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (encoding & ~(BTF_INT_SIGNED | BTF_INT_CHAR | BTF_INT_BOOL))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* deconstruct BTF, if necessary, and invalidate raw_data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type) + sizeof(int);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        /* if something goes wrong later, we might end up with an extra string,
         * but that shouldn't be a problem, because BTF can't be constructed
@@ -1736,20 +1735,20 @@ int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
 
        /* non-empty name */
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* byte_sz must be one of the explicitly allowed values */
        if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
            byte_sz != 16)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        name_off = btf__add_str(btf, name);
        if (name_off < 0)
@@ -1780,15 +1779,15 @@ static int btf_add_ref_kind(struct btf *btf, int kind, const char *name, int ref
        int sz, name_off = 0;
 
        if (validate_type_id(ref_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        if (name && name[0]) {
                name_off = btf__add_str(btf, name);
@@ -1831,15 +1830,15 @@ int btf__add_array(struct btf *btf, int index_type_id, int elem_type_id, __u32 n
        int sz;
 
        if (validate_type_id(index_type_id) || validate_type_id(elem_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type) + sizeof(struct btf_array);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        t->name_off = 0;
        t->info = btf_type_info(BTF_KIND_ARRAY, 0, 0);
@@ -1860,12 +1859,12 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
        int sz, name_off = 0;
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        if (name && name[0]) {
                name_off = btf__add_str(btf, name);
@@ -1943,30 +1942,30 @@ int btf__add_field(struct btf *btf, const char *name, int type_id,
 
        /* last type should be union/struct */
        if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        t = btf_last_type(btf);
        if (!btf_is_composite(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (validate_type_id(type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        /* best-effort bit field offset/size enforcement */
        is_bitfield = bit_size || (bit_offset % 8 != 0);
        if (is_bitfield && (bit_size == 0 || bit_size > 255 || bit_offset > 0xffffff))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* only offset 0 is allowed for unions */
        if (btf_is_union(t) && bit_offset)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* decompose and invalidate raw data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_member);
        m = btf_add_type_mem(btf, sz);
        if (!m)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        if (name && name[0]) {
                name_off = btf__add_str(btf, name);
@@ -2008,15 +2007,15 @@ int btf__add_enum(struct btf *btf, const char *name, __u32 byte_sz)
 
        /* byte_sz must be power of 2 */
        if (!byte_sz || (byte_sz & (byte_sz - 1)) || byte_sz > 8)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        if (name && name[0]) {
                name_off = btf__add_str(btf, name);
@@ -2048,25 +2047,25 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 
        /* last type should be BTF_KIND_ENUM */
        if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        t = btf_last_type(btf);
        if (!btf_is_enum(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* non-empty name */
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (value < INT_MIN || value > UINT_MAX)
-               return -E2BIG;
+               return libbpf_err(-E2BIG);
 
        /* decompose and invalidate raw data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_enum);
        v = btf_add_type_mem(btf, sz);
        if (!v)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        name_off = btf__add_str(btf, name);
        if (name_off < 0)
@@ -2096,7 +2095,7 @@ int btf__add_enum_value(struct btf *btf, const char *name, __s64 value)
 int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 {
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        switch (fwd_kind) {
        case BTF_FWD_STRUCT:
@@ -2117,7 +2116,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
                 */
                return btf__add_enum(btf, name, sizeof(int));
        default:
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 }
 
@@ -2132,7 +2131,7 @@ int btf__add_fwd(struct btf *btf, const char *name, enum btf_fwd_kind fwd_kind)
 int btf__add_typedef(struct btf *btf, const char *name, int ref_type_id)
 {
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        return btf_add_ref_kind(btf, BTF_KIND_TYPEDEF, name, ref_type_id);
 }
@@ -2187,10 +2186,10 @@ int btf__add_func(struct btf *btf, const char *name,
        int id;
 
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (linkage != BTF_FUNC_STATIC && linkage != BTF_FUNC_GLOBAL &&
            linkage != BTF_FUNC_EXTERN)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        id = btf_add_ref_kind(btf, BTF_KIND_FUNC, name, proto_type_id);
        if (id > 0) {
@@ -2198,7 +2197,7 @@ int btf__add_func(struct btf *btf, const char *name,
 
                t->info = btf_type_info(BTF_KIND_FUNC, linkage, 0);
        }
-       return id;
+       return libbpf_err(id);
 }
 
 /*
@@ -2219,15 +2218,15 @@ int btf__add_func_proto(struct btf *btf, int ret_type_id)
        int sz;
 
        if (validate_type_id(ret_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        /* start out with vlen=0; this will be adjusted when adding enum
         * values, if necessary
@@ -2254,23 +2253,23 @@ int btf__add_func_param(struct btf *btf, const char *name, int type_id)
        int sz, name_off = 0;
 
        if (validate_type_id(type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* last type should be BTF_KIND_FUNC_PROTO */
        if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        t = btf_last_type(btf);
        if (!btf_is_func_proto(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* decompose and invalidate raw data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_param);
        p = btf_add_type_mem(btf, sz);
        if (!p)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        if (name && name[0]) {
                name_off = btf__add_str(btf, name);
@@ -2308,21 +2307,21 @@ int btf__add_var(struct btf *btf, const char *name, int linkage, int type_id)
 
        /* non-empty name */
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (linkage != BTF_VAR_STATIC && linkage != BTF_VAR_GLOBAL_ALLOCATED &&
            linkage != BTF_VAR_GLOBAL_EXTERN)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (validate_type_id(type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* deconstruct BTF, if necessary, and invalidate raw_data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type) + sizeof(struct btf_var);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        name_off = btf__add_str(btf, name);
        if (name_off < 0)
@@ -2357,15 +2356,15 @@ int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz)
 
        /* non-empty name */
        if (!name || !name[0])
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_type);
        t = btf_add_type_mem(btf, sz);
        if (!t)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        name_off = btf__add_str(btf, name);
        if (name_off < 0)
@@ -2397,22 +2396,22 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 
        /* last type should be BTF_KIND_DATASEC */
        if (btf->nr_types == 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        t = btf_last_type(btf);
        if (!btf_is_datasec(t))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (validate_type_id(var_type_id))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* decompose and invalidate raw data */
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        sz = sizeof(struct btf_var_secinfo);
        v = btf_add_type_mem(btf, sz);
        if (!v)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        v->type = var_type_id;
        v->offset = offset;
@@ -2614,11 +2613,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 
        err = btf_ext_parse_hdr(data, size);
        if (err)
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
 
        btf_ext = calloc(1, sizeof(struct btf_ext));
        if (!btf_ext)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
 
        btf_ext->data_size = size;
        btf_ext->data = malloc(size);
@@ -2628,9 +2627,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
        }
        memcpy(btf_ext->data, data, size);
 
-       if (btf_ext->hdr->hdr_len <
-           offsetofend(struct btf_ext_header, line_info_len))
+       if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {
+               err = -EINVAL;
                goto done;
+       }
+
        err = btf_ext_setup_func_info(btf_ext);
        if (err)
                goto done;
@@ -2639,8 +2640,11 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
        if (err)
                goto done;
 
-       if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len))
+       if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) {
+               err = -EINVAL;
                goto done;
+       }
+
        err = btf_ext_setup_core_relos(btf_ext);
        if (err)
                goto done;
@@ -2648,7 +2652,7 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)
 done:
        if (err) {
                btf_ext__free(btf_ext);
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
        }
 
        return btf_ext;
@@ -2687,7 +2691,7 @@ static int btf_ext_reloc_info(const struct btf *btf,
                existing_len = (*cnt) * record_size;
                data = realloc(*info, existing_len + records_len);
                if (!data)
-                       return -ENOMEM;
+                       return libbpf_err(-ENOMEM);
 
                memcpy(data + existing_len, sinfo->data, records_len);
                /* adjust insn_off only, the rest data will be passed
@@ -2697,15 +2701,14 @@ static int btf_ext_reloc_info(const struct btf *btf,
                        __u32 *insn_off;
 
                        insn_off = data + existing_len + (i * record_size);
-                       *insn_off = *insn_off / sizeof(struct bpf_insn) +
-                               insns_cnt;
+                       *insn_off = *insn_off / sizeof(struct bpf_insn) + insns_cnt;
                }
                *info = data;
                *cnt += sinfo->num_info;
                return 0;
        }
 
-       return -ENOENT;
+       return libbpf_err(-ENOENT);
 }
 
 int btf_ext__reloc_func_info(const struct btf *btf,
@@ -2894,11 +2897,11 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 
        if (IS_ERR(d)) {
                pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (btf_ensure_modifiable(btf))
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        err = btf_dedup_prep(d);
        if (err) {
@@ -2938,7 +2941,7 @@ int btf__dedup(struct btf *btf, struct btf_ext *btf_ext,
 
 done:
        btf_dedup_free(d);
-       return err;
+       return libbpf_err(err);
 }
 
 #define BTF_UNPROCESSED_ID ((__u32)-1)
@@ -4411,7 +4414,7 @@ struct btf *libbpf_find_kernel_btf(void)
        char path[PATH_MAX + 1];
        struct utsname buf;
        struct btf *btf;
-       int i;
+       int i, err;
 
        uname(&buf);
 
@@ -4425,17 +4428,16 @@ struct btf *libbpf_find_kernel_btf(void)
                        btf = btf__parse_raw(path);
                else
                        btf = btf__parse_elf(path, NULL);
-
-               pr_debug("loading kernel BTF '%s': %ld\n",
-                        path, IS_ERR(btf) ? PTR_ERR(btf) : 0);
-               if (IS_ERR(btf))
+               err = libbpf_get_error(btf);
+               pr_debug("loading kernel BTF '%s': %d\n", path, err);
+               if (err)
                        continue;
 
                return btf;
        }
 
        pr_warn("failed to find valid kernel BTF\n");
-       return ERR_PTR(-ESRCH);
+       return libbpf_err_ptr(-ESRCH);
 }
 
 int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
index 5e2809d..5dc6b51 100644 (file)
@@ -128,7 +128,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
 
        d = calloc(1, sizeof(struct btf_dump));
        if (!d)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
 
        d->btf = btf;
        d->btf_ext = btf_ext;
@@ -156,7 +156,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
        return d;
 err:
        btf_dump__free(d);
-       return ERR_PTR(err);
+       return libbpf_err_ptr(err);
 }
 
 static int btf_dump_resize(struct btf_dump *d)
@@ -236,16 +236,16 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id)
        int err, i;
 
        if (id > btf__get_nr_types(d->btf))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        err = btf_dump_resize(d);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        d->emit_queue_cnt = 0;
        err = btf_dump_order_type(d, id, false);
        if (err < 0)
-               return err;
+               return libbpf_err(err);
 
        for (i = 0; i < d->emit_queue_cnt; i++)
                btf_dump_emit_type(d, d->emit_queue[i], 0 /*top-level*/);
@@ -1075,11 +1075,11 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
        int lvl, err;
 
        if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        err = btf_dump_resize(d);
        if (err)
-               return -EINVAL;
+               return libbpf_err(err);
 
        fname = OPTS_GET(opts, field_name, "");
        lvl = OPTS_GET(opts, indent_level, 0);
index 69cd1a8..48c0ade 100644 (file)
@@ -151,6 +151,23 @@ static inline __u64 ptr_to_u64(const void *ptr)
        return (__u64) (unsigned long) ptr;
 }
 
+/* this goes away in libbpf 1.0 */
+enum libbpf_strict_mode libbpf_mode = LIBBPF_STRICT_NONE;
+
+int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
+{
+       /* __LIBBPF_STRICT_LAST is the last power-of-2 value used + 1, so to
+        * get all possible values we compensate last +1, and then (2*x - 1)
+        * to get the bit mask
+        */
+       if (mode != LIBBPF_STRICT_ALL
+           && (mode & ~((__LIBBPF_STRICT_LAST - 1) * 2 - 1)))
+               return errno = EINVAL, -EINVAL;
+
+       libbpf_mode = mode;
+       return 0;
+}
+
 enum kern_feature_id {
        /* v4.14: kernel support for program & map names. */
        FEAT_PROG_NAME,
@@ -2448,10 +2465,8 @@ static int bpf_object__init_maps(struct bpf_object *obj,
        err = err ?: bpf_object__init_global_data_maps(obj);
        err = err ?: bpf_object__init_kconfig_map(obj);
        err = err ?: bpf_object__init_struct_ops_maps(obj);
-       if (err)
-               return err;
 
-       return 0;
+       return err;
 }
 
 static bool section_have_execinstr(struct bpf_object *obj, int idx)
@@ -2562,16 +2577,14 @@ static int bpf_object__init_btf(struct bpf_object *obj,
 
        if (btf_data) {
                obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
-               if (IS_ERR(obj->btf)) {
-                       err = PTR_ERR(obj->btf);
+               err = libbpf_get_error(obj->btf);
+               if (err) {
                        obj->btf = NULL;
-                       pr_warn("Error loading ELF section %s: %d.\n",
-                               BTF_ELF_SEC, err);
+                       pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
                        goto out;
                }
                /* enforce 8-byte pointers for BPF-targeted BTFs */
                btf__set_pointer_size(obj->btf, 8);
-               err = 0;
        }
        if (btf_ext_data) {
                if (!obj->btf) {
@@ -2579,11 +2592,11 @@ static int bpf_object__init_btf(struct bpf_object *obj,
                                 BTF_EXT_ELF_SEC, BTF_ELF_SEC);
                        goto out;
                }
-               obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
-                                           btf_ext_data->d_size);
-               if (IS_ERR(obj->btf_ext)) {
-                       pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n",
-                               BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
+               obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
+               err = libbpf_get_error(obj->btf_ext);
+               if (err) {
+                       pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
+                               BTF_EXT_ELF_SEC, err);
                        obj->btf_ext = NULL;
                        goto out;
                }
@@ -2667,8 +2680,8 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
                return 0;
 
        obj->btf_vmlinux = libbpf_find_kernel_btf();
-       if (IS_ERR(obj->btf_vmlinux)) {
-               err = PTR_ERR(obj->btf_vmlinux);
+       err = libbpf_get_error(obj->btf_vmlinux);
+       if (err) {
                pr_warn("Error loading vmlinux BTF: %d\n", err);
                obj->btf_vmlinux = NULL;
                return err;
@@ -2734,8 +2747,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
                /* clone BTF to sanitize a copy and leave the original intact */
                raw_data = btf__get_raw_data(obj->btf, &sz);
                kern_btf = btf__new(raw_data, sz);
-               if (IS_ERR(kern_btf))
-                       return PTR_ERR(kern_btf);
+               err = libbpf_get_error(kern_btf);
+               if (err)
+                       return err;
 
                /* enforce 8-byte pointers for BPF-targeted BTFs */
                btf__set_pointer_size(obj->btf, 8);
@@ -3509,7 +3523,7 @@ bpf_object__find_program_by_title(const struct bpf_object *obj,
                if (pos->sec_name && !strcmp(pos->sec_name, title))
                        return pos;
        }
-       return NULL;
+       return errno = ENOENT, NULL;
 }
 
 static bool prog_is_subprog(const struct bpf_object *obj,
@@ -3542,7 +3556,7 @@ bpf_object__find_program_by_name(const struct bpf_object *obj,
                if (!strcmp(prog->name, name))
                        return prog;
        }
-       return NULL;
+       return errno = ENOENT, NULL;
 }
 
 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
@@ -3889,11 +3903,11 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 
        err = bpf_obj_get_info_by_fd(fd, &info, &len);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        new_name = strdup(info.name);
        if (!new_name)
-               return -errno;
+               return libbpf_err(-errno);
 
        new_fd = open("/", O_RDONLY | O_CLOEXEC);
        if (new_fd < 0) {
@@ -3931,7 +3945,7 @@ err_close_new_fd:
        close(new_fd);
 err_free_new_name:
        free(new_name);
-       return err;
+       return libbpf_err(err);
 }
 
 __u32 bpf_map__max_entries(const struct bpf_map *map)
@@ -3942,7 +3956,7 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 {
        if (!bpf_map_type__is_map_in_map(map->def.type))
-               return NULL;
+               return errno = EINVAL, NULL;
 
        return map->inner_map;
 }
@@ -3950,7 +3964,7 @@ struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->def.max_entries = max_entries;
        return 0;
 }
@@ -3958,7 +3972,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
 {
        if (!map || !max_entries)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        return bpf_map__set_max_entries(map, max_entries);
 }
@@ -3974,6 +3988,9 @@ bpf_object__probe_loading(struct bpf_object *obj)
        };
        int ret;
 
+       if (obj->gen_loader)
+               return 0;
+
        /* make sure basic loading works */
 
        memset(&attr, 0, sizeof(attr));
@@ -4565,7 +4582,7 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)
                targ_map = map->init_slots[i];
                fd = bpf_map__fd(targ_map);
                if (obj->gen_loader) {
-                       pr_warn("// TODO map_update_elem: idx %ld key %d value==map_idx %ld\n",
+                       pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n",
                                map - obj->maps, i, targ_map - obj->maps);
                        return -ENOTSUP;
                } else {
@@ -5086,10 +5103,10 @@ static int load_module_btfs(struct bpf_object *obj)
                }
 
                btf = btf_get_from_fd(fd, obj->btf_vmlinux);
-               if (IS_ERR(btf)) {
-                       pr_warn("failed to load module [%s]'s BTF object #%d: %ld\n",
-                               name, id, PTR_ERR(btf));
-                       err = PTR_ERR(btf);
+               err = libbpf_get_error(btf);
+               if (err) {
+                       pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
+                               name, id, err);
                        goto err_out;
                }
 
@@ -6189,7 +6206,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog,
                return -EINVAL;
 
        if (prog->obj->gen_loader) {
-               pr_warn("// TODO core_relo: prog %ld insn[%d] %s %s kind %d\n",
+               pr_warn("// TODO core_relo: prog %td insn[%d] %s %s kind %d\n",
                        prog - prog->obj->programs, relo->insn_off / 8,
                        local_name, spec_str, relo->kind);
                return -ENOTSUP;
@@ -6349,8 +6366,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
 
        if (targ_btf_path) {
                obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
-               if (IS_ERR_OR_NULL(obj->btf_vmlinux_override)) {
-                       err = PTR_ERR(obj->btf_vmlinux_override);
+               err = libbpf_get_error(obj->btf_vmlinux_override);
+               if (err) {
                        pr_warn("failed to parse target BTF: %d\n", err);
                        return err;
                }
@@ -7407,7 +7424,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 
        if (prog->obj->loaded) {
                pr_warn("prog '%s': can't load after object was loaded\n", prog->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if ((prog->type == BPF_PROG_TYPE_TRACING ||
@@ -7417,7 +7434,7 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
 
                err = libbpf_find_attach_btf_id(prog, &btf_obj_fd, &btf_type_id);
                if (err)
-                       return err;
+                       return libbpf_err(err);
 
                prog->attach_btf_obj_fd = btf_obj_fd;
                prog->attach_btf_id = btf_type_id;
@@ -7427,13 +7444,13 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
                if (prog->preprocessor) {
                        pr_warn("Internal error: can't load program '%s'\n",
                                prog->name);
-                       return -LIBBPF_ERRNO__INTERNAL;
+                       return libbpf_err(-LIBBPF_ERRNO__INTERNAL);
                }
 
                prog->instances.fds = malloc(sizeof(int));
                if (!prog->instances.fds) {
                        pr_warn("Not enough memory for BPF fds\n");
-                       return -ENOMEM;
+                       return libbpf_err(-ENOMEM);
                }
                prog->instances.nr = 1;
                prog->instances.fds[0] = -1;
@@ -7492,7 +7509,7 @@ out:
                pr_warn("failed to load program '%s'\n", prog->name);
        zfree(&prog->insns);
        prog->insns_cnt = 0;
-       return err;
+       return libbpf_err(err);
 }
 
 static int
@@ -7625,7 +7642,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)
 
 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
 {
-       return __bpf_object__open_xattr(attr, 0);
+       return libbpf_ptr(__bpf_object__open_xattr(attr, 0));
 }
 
 struct bpf_object *bpf_object__open(const char *path)
@@ -7635,18 +7652,18 @@ struct bpf_object *bpf_object__open(const char *path)
                .prog_type      = BPF_PROG_TYPE_UNSPEC,
        };
 
-       return bpf_object__open_xattr(&attr);
+       return libbpf_ptr(__bpf_object__open_xattr(&attr, 0));
 }
 
 struct bpf_object *
 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
 {
        if (!path)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
 
        pr_debug("loading %s\n", path);
 
-       return __bpf_object__open(path, NULL, 0, opts);
+       return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts));
 }
 
 struct bpf_object *
@@ -7654,9 +7671,9 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
                     const struct bpf_object_open_opts *opts)
 {
        if (!obj_buf || obj_buf_sz == 0)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
 
-       return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts);
+       return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts));
 }
 
 struct bpf_object *
@@ -7671,9 +7688,9 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,
 
        /* returning NULL is wrong, but backwards-compatible */
        if (!obj_buf || obj_buf_sz == 0)
-               return NULL;
+               return errno = EINVAL, NULL;
 
-       return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);
+       return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts));
 }
 
 int bpf_object__unload(struct bpf_object *obj)
@@ -7681,7 +7698,7 @@ int bpf_object__unload(struct bpf_object *obj)
        size_t i;
 
        if (!obj)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        for (i = 0; i < obj->nr_maps; i++) {
                zclose(obj->maps[i].fd);
@@ -8014,14 +8031,14 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
        int err, i;
 
        if (!attr)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        obj = attr->obj;
        if (!obj)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (obj->loaded) {
                pr_warn("object '%s': load can't be attempted twice\n", obj->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (obj->gen_loader)
@@ -8072,7 +8089,7 @@ out:
 
        bpf_object__unload(obj);
        pr_warn("failed to load object '%s'\n", obj->path);
-       return err;
+       return libbpf_err(err);
 }
 
 int bpf_object__load(struct bpf_object *obj)
@@ -8144,28 +8161,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
 
        err = make_parent_dir(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        err = check_path(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        if (prog == NULL) {
                pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (instance < 0 || instance >= prog->instances.nr) {
                pr_warn("invalid prog instance %d of prog %s (max %d)\n",
                        instance, prog->name, prog->instances.nr);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (bpf_obj_pin(prog->instances.fds[instance], path)) {
                err = -errno;
                cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
                pr_warn("failed to pin program: %s\n", cp);
-               return err;
+               return libbpf_err(err);
        }
        pr_debug("pinned program '%s'\n", path);
 
@@ -8179,22 +8196,23 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
 
        err = check_path(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        if (prog == NULL) {
                pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (instance < 0 || instance >= prog->instances.nr) {
                pr_warn("invalid prog instance %d of prog %s (max %d)\n",
                        instance, prog->name, prog->instances.nr);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        err = unlink(path);
        if (err != 0)
-               return -errno;
+               return libbpf_err(-errno);
+
        pr_debug("unpinned program '%s'\n", path);
 
        return 0;
@@ -8206,20 +8224,20 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)
 
        err = make_parent_dir(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        err = check_path(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        if (prog == NULL) {
                pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (prog->instances.nr <= 0) {
                pr_warn("no instances of prog %s to pin\n", prog->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (prog->instances.nr == 1) {
@@ -8263,7 +8281,7 @@ err_unpin:
 
        rmdir(path);
 
-       return err;
+       return libbpf_err(err);
 }
 
 int bpf_program__unpin(struct bpf_program *prog, const char *path)
@@ -8272,16 +8290,16 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 
        err = check_path(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        if (prog == NULL) {
                pr_warn("invalid program pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (prog->instances.nr <= 0) {
                pr_warn("no instances of prog %s to pin\n", prog->name);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (prog->instances.nr == 1) {
@@ -8295,9 +8313,9 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 
                len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
                if (len < 0)
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                else if (len >= PATH_MAX)
-                       return -ENAMETOOLONG;
+                       return libbpf_err(-ENAMETOOLONG);
 
                err = bpf_program__unpin_instance(prog, buf, i);
                if (err)
@@ -8306,7 +8324,7 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)
 
        err = rmdir(path);
        if (err)
-               return -errno;
+               return libbpf_err(-errno);
 
        return 0;
 }
@@ -8318,14 +8336,14 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 
        if (map == NULL) {
                pr_warn("invalid map pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (map->pin_path) {
                if (path && strcmp(path, map->pin_path)) {
                        pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
                                bpf_map__name(map), map->pin_path, path);
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                } else if (map->pinned) {
                        pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
                                 bpf_map__name(map), map->pin_path);
@@ -8335,10 +8353,10 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
                if (!path) {
                        pr_warn("missing a path to pin map '%s' at\n",
                                bpf_map__name(map));
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                } else if (map->pinned) {
                        pr_warn("map '%s' already pinned\n", bpf_map__name(map));
-                       return -EEXIST;
+                       return libbpf_err(-EEXIST);
                }
 
                map->pin_path = strdup(path);
@@ -8350,11 +8368,11 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 
        err = make_parent_dir(map->pin_path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        err = check_path(map->pin_path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        if (bpf_obj_pin(map->fd, map->pin_path)) {
                err = -errno;
@@ -8369,7 +8387,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path)
 out_err:
        cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
        pr_warn("failed to pin map: %s\n", cp);
-       return err;
+       return libbpf_err(err);
 }
 
 int bpf_map__unpin(struct bpf_map *map, const char *path)
@@ -8378,29 +8396,29 @@ int bpf_map__unpin(struct bpf_map *map, const char *path)
 
        if (map == NULL) {
                pr_warn("invalid map pointer\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        if (map->pin_path) {
                if (path && strcmp(path, map->pin_path)) {
                        pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
                                bpf_map__name(map), map->pin_path, path);
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                }
                path = map->pin_path;
        } else if (!path) {
                pr_warn("no path to unpin map '%s' from\n",
                        bpf_map__name(map));
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        err = check_path(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        err = unlink(path);
        if (err != 0)
-               return -errno;
+               return libbpf_err(-errno);
 
        map->pinned = false;
        pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
@@ -8415,7 +8433,7 @@ int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
        if (path) {
                new = strdup(path);
                if (!new)
-                       return -errno;
+                       return libbpf_err(-errno);
        }
 
        free(map->pin_path);
@@ -8449,11 +8467,11 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
        int err;
 
        if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
 
        if (!obj->loaded) {
                pr_warn("object not yet loaded; load it first\n");
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
        }
 
        bpf_object__for_each_map(map, obj) {
@@ -8493,7 +8511,7 @@ err_unpin_maps:
                bpf_map__unpin(map, NULL);
        }
 
-       return err;
+       return libbpf_err(err);
 }
 
 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
@@ -8502,7 +8520,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
        int err;
 
        if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
 
        bpf_object__for_each_map(map, obj) {
                char *pin_path = NULL;
@@ -8514,9 +8532,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
                        len = snprintf(buf, PATH_MAX, "%s/%s", path,
                                       bpf_map__name(map));
                        if (len < 0)
-                               return -EINVAL;
+                               return libbpf_err(-EINVAL);
                        else if (len >= PATH_MAX)
-                               return -ENAMETOOLONG;
+                               return libbpf_err(-ENAMETOOLONG);
                        sanitize_pin_path(buf);
                        pin_path = buf;
                } else if (!map->pin_path) {
@@ -8525,7 +8543,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 
                err = bpf_map__unpin(map, pin_path);
                if (err)
-                       return err;
+                       return libbpf_err(err);
        }
 
        return 0;
@@ -8537,11 +8555,11 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
        int err;
 
        if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
 
        if (!obj->loaded) {
                pr_warn("object not yet loaded; load it first\n");
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
        }
 
        bpf_object__for_each_program(prog, obj) {
@@ -8580,7 +8598,7 @@ err_unpin_programs:
                bpf_program__unpin(prog, buf);
        }
 
-       return err;
+       return libbpf_err(err);
 }
 
 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
@@ -8589,7 +8607,7 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
        int err;
 
        if (!obj)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
 
        bpf_object__for_each_program(prog, obj) {
                char buf[PATH_MAX];
@@ -8598,13 +8616,13 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
                len = snprintf(buf, PATH_MAX, "%s/%s", path,
                               prog->pin_name);
                if (len < 0)
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                else if (len >= PATH_MAX)
-                       return -ENAMETOOLONG;
+                       return libbpf_err(-ENAMETOOLONG);
 
                err = bpf_program__unpin(prog, buf);
                if (err)
-                       return err;
+                       return libbpf_err(err);
        }
 
        return 0;
@@ -8616,12 +8634,12 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 
        err = bpf_object__pin_maps(obj, path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        err = bpf_object__pin_programs(obj, path);
        if (err) {
                bpf_object__unpin_maps(obj, path);
-               return err;
+               return libbpf_err(err);
        }
 
        return 0;
@@ -8718,7 +8736,7 @@ bpf_object__next(struct bpf_object *prev)
 
 const char *bpf_object__name(const struct bpf_object *obj)
 {
-       return obj ? obj->name : ERR_PTR(-EINVAL);
+       return obj ? obj->name : libbpf_err_ptr(-EINVAL);
 }
 
 unsigned int bpf_object__kversion(const struct bpf_object *obj)
@@ -8739,7 +8757,7 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
 {
        if (obj->loaded)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        obj->kern_version = kern_version;
 
@@ -8759,7 +8777,7 @@ int bpf_object__set_priv(struct bpf_object *obj, void *priv,
 
 void *bpf_object__priv(const struct bpf_object *obj)
 {
-       return obj ? obj->priv : ERR_PTR(-EINVAL);
+       return obj ? obj->priv : libbpf_err_ptr(-EINVAL);
 }
 
 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
@@ -8795,7 +8813,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
 
        if (p->obj != obj) {
                pr_warn("error: program handler doesn't match object\n");
-               return NULL;
+               return errno = EINVAL, NULL;
        }
 
        idx = (p - obj->programs) + (forward ? 1 : -1);
@@ -8841,7 +8859,7 @@ int bpf_program__set_priv(struct bpf_program *prog, void *priv,
 
 void *bpf_program__priv(const struct bpf_program *prog)
 {
-       return prog ? prog->priv : ERR_PTR(-EINVAL);
+       return prog ? prog->priv : libbpf_err_ptr(-EINVAL);
 }
 
 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
@@ -8868,7 +8886,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
                title = strdup(title);
                if (!title) {
                        pr_warn("failed to strdup program title\n");
-                       return ERR_PTR(-ENOMEM);
+                       return libbpf_err_ptr(-ENOMEM);
                }
        }
 
@@ -8883,7 +8901,7 @@ bool bpf_program__autoload(const struct bpf_program *prog)
 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 {
        if (prog->obj->loaded)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        prog->load = autoload;
        return 0;
@@ -8905,17 +8923,17 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
        int *instances_fds;
 
        if (nr_instances <= 0 || !prep)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (prog->instances.nr > 0 || prog->instances.fds) {
                pr_warn("Can't set pre-processor after loading\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        instances_fds = malloc(sizeof(int) * nr_instances);
        if (!instances_fds) {
                pr_warn("alloc memory failed for fds\n");
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
        }
 
        /* fill all fd with -1 */
@@ -8932,19 +8950,19 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
        int fd;
 
        if (!prog)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (n >= prog->instances.nr || n < 0) {
                pr_warn("Can't get the %dth fd from program %s: only %d instances\n",
                        n, prog->name, prog->instances.nr);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        fd = prog->instances.fds[n];
        if (fd < 0) {
                pr_warn("%dth instance of program '%s' is invalid\n",
                        n, prog->name);
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
        }
 
        return fd;
@@ -8970,7 +8988,7 @@ static bool bpf_program__is_type(const struct bpf_program *prog,
 int bpf_program__set_##NAME(struct bpf_program *prog)          \
 {                                                              \
        if (!prog)                                              \
-               return -EINVAL;                                 \
+               return libbpf_err(-EINVAL);                     \
        bpf_program__set_type(prog, TYPE);                      \
        return 0;                                               \
 }                                                              \
@@ -9060,7 +9078,10 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
 
 static const struct bpf_sec_def section_defs[] = {
        BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
-       BPF_PROG_SEC("sk_reuseport",            BPF_PROG_TYPE_SK_REUSEPORT),
+       BPF_EAPROG_SEC("sk_reuseport/migrate",  BPF_PROG_TYPE_SK_REUSEPORT,
+                                               BPF_SK_REUSEPORT_SELECT_OR_MIGRATE),
+       BPF_EAPROG_SEC("sk_reuseport",          BPF_PROG_TYPE_SK_REUSEPORT,
+                                               BPF_SK_REUSEPORT_SELECT),
        SEC_DEF("kprobe/", KPROBE,
                .attach_fn = attach_kprobe),
        BPF_PROG_SEC("uprobe/",                 BPF_PROG_TYPE_KPROBE),
@@ -9257,7 +9278,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
        char *type_names;
 
        if (!name)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        sec_def = find_sec_def(name);
        if (sec_def) {
@@ -9273,7 +9294,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
                free(type_names);
        }
 
-       return -ESRCH;
+       return libbpf_err(-ESRCH);
 }
 
 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
@@ -9471,9 +9492,10 @@ int libbpf_find_vmlinux_btf_id(const char *name,
        int err;
 
        btf = libbpf_find_kernel_btf();
-       if (IS_ERR(btf)) {
+       err = libbpf_get_error(btf);
+       if (err) {
                pr_warn("vmlinux BTF is not found\n");
-               return -EINVAL;
+               return libbpf_err(err);
        }
 
        err = find_attach_btf_id(btf, name, attach_type);
@@ -9481,7 +9503,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
                pr_warn("%s is not found in vmlinux BTF\n", name);
 
        btf__free(btf);
-       return err;
+       return libbpf_err(err);
 }
 
 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -9492,10 +9514,11 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
        int err = -EINVAL;
 
        info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0);
-       if (IS_ERR_OR_NULL(info_linear)) {
+       err = libbpf_get_error(info_linear);
+       if (err) {
                pr_warn("failed get_prog_info_linear for FD %d\n",
                        attach_prog_fd);
-               return -EINVAL;
+               return err;
        }
        info = &info_linear->info;
        if (!info->btf_id) {
@@ -9616,13 +9639,13 @@ int libbpf_attach_type_by_name(const char *name,
        int i;
 
        if (!name)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
                if (strncmp(name, section_defs[i].sec, section_defs[i].len))
                        continue;
                if (!section_defs[i].is_attachable)
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                *attach_type = section_defs[i].expected_attach_type;
                return 0;
        }
@@ -9633,17 +9656,17 @@ int libbpf_attach_type_by_name(const char *name,
                free(type_names);
        }
 
-       return -EINVAL;
+       return libbpf_err(-EINVAL);
 }
 
 int bpf_map__fd(const struct bpf_map *map)
 {
-       return map ? map->fd : -EINVAL;
+       return map ? map->fd : libbpf_err(-EINVAL);
 }
 
 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
 {
-       return map ? &map->def : ERR_PTR(-EINVAL);
+       return map ? &map->def : libbpf_err_ptr(-EINVAL);
 }
 
 const char *bpf_map__name(const struct bpf_map *map)
@@ -9659,7 +9682,7 @@ enum bpf_map_type bpf_map__type(const struct bpf_map *map)
 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->def.type = type;
        return 0;
 }
@@ -9672,7 +9695,7 @@ __u32 bpf_map__map_flags(const struct bpf_map *map)
 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->def.map_flags = flags;
        return 0;
 }
@@ -9685,7 +9708,7 @@ __u32 bpf_map__numa_node(const struct bpf_map *map)
 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->numa_node = numa_node;
        return 0;
 }
@@ -9698,7 +9721,7 @@ __u32 bpf_map__key_size(const struct bpf_map *map)
 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->def.key_size = size;
        return 0;
 }
@@ -9711,7 +9734,7 @@ __u32 bpf_map__value_size(const struct bpf_map *map)
 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->def.value_size = size;
        return 0;
 }
@@ -9730,7 +9753,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
                     bpf_map_clear_priv_t clear_priv)
 {
        if (!map)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (map->priv) {
                if (map->clear_priv)
@@ -9744,7 +9767,7 @@ int bpf_map__set_priv(struct bpf_map *map, void *priv,
 
 void *bpf_map__priv(const struct bpf_map *map)
 {
-       return map ? map->priv : ERR_PTR(-EINVAL);
+       return map ? map->priv : libbpf_err_ptr(-EINVAL);
 }
 
 int bpf_map__set_initial_value(struct bpf_map *map,
@@ -9752,7 +9775,7 @@ int bpf_map__set_initial_value(struct bpf_map *map,
 {
        if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
            size != map->def.value_size || map->fd >= 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        memcpy(map->mmaped, data, size);
        return 0;
@@ -9784,7 +9807,7 @@ __u32 bpf_map__ifindex(const struct bpf_map *map)
 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
 {
        if (map->fd >= 0)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        map->map_ifindex = ifindex;
        return 0;
 }
@@ -9793,11 +9816,11 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
 {
        if (!bpf_map_type__is_map_in_map(map->def.type)) {
                pr_warn("error: unsupported map type\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
        if (map->inner_map_fd != -1) {
                pr_warn("error: inner_map_fd already specified\n");
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
        zfree(&map->inner_map);
        map->inner_map_fd = fd;
@@ -9811,7 +9834,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
        struct bpf_map *s, *e;
 
        if (!obj || !obj->maps)
-               return NULL;
+               return errno = EINVAL, NULL;
 
        s = obj->maps;
        e = obj->maps + obj->nr_maps;
@@ -9819,7 +9842,7 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
        if ((m < s) || (m >= e)) {
                pr_warn("error in %s: map handler doesn't belong to object\n",
                         __func__);
-               return NULL;
+               return errno = EINVAL, NULL;
        }
 
        idx = (m - obj->maps) + i;
@@ -9858,7 +9881,7 @@ bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
                if (pos->name && !strcmp(pos->name, name))
                        return pos;
        }
-       return NULL;
+       return errno = ENOENT, NULL;
 }
 
 int
@@ -9870,12 +9893,23 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
 struct bpf_map *
 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
 {
-       return ERR_PTR(-ENOTSUP);
+       return libbpf_err_ptr(-ENOTSUP);
 }
 
 long libbpf_get_error(const void *ptr)
 {
-       return PTR_ERR_OR_ZERO(ptr);
+       if (!IS_ERR_OR_NULL(ptr))
+               return 0;
+
+       if (IS_ERR(ptr))
+               errno = -PTR_ERR(ptr);
+
+       /* If ptr == NULL, then errno should be already set by the failing
+        * API, because libbpf never returns NULL on success and it now always
+        * sets errno on error. So no extra errno handling for ptr == NULL
+        * case.
+        */
+       return -errno;
 }
 
 int bpf_prog_load(const char *file, enum bpf_prog_type type,
@@ -9901,16 +9935,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        int err;
 
        if (!attr)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (!attr->file)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        open_attr.file = attr->file;
        open_attr.prog_type = attr->prog_type;
 
        obj = bpf_object__open_xattr(&open_attr);
-       if (IS_ERR_OR_NULL(obj))
-               return -ENOENT;
+       err = libbpf_get_error(obj);
+       if (err)
+               return libbpf_err(-ENOENT);
 
        bpf_object__for_each_program(prog, obj) {
                enum bpf_attach_type attach_type = attr->expected_attach_type;
@@ -9930,7 +9965,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
                         * didn't provide a fallback type, too bad...
                         */
                        bpf_object__close(obj);
-                       return -EINVAL;
+                       return libbpf_err(-EINVAL);
                }
 
                prog->prog_ifindex = attr->ifindex;
@@ -9948,13 +9983,13 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
        if (!first_prog) {
                pr_warn("object file doesn't contain bpf program\n");
                bpf_object__close(obj);
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
        }
 
        err = bpf_object__load(obj);
        if (err) {
                bpf_object__close(obj);
-               return err;
+               return libbpf_err(err);
        }
 
        *pobj = obj;
@@ -9973,7 +10008,10 @@ struct bpf_link {
 /* Replace link's underlying BPF program with the new one */
 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
 {
-       return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+       int ret;
+       
+       ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+       return libbpf_err_errno(ret);
 }
 
 /* Release "ownership" of underlying BPF resource (typically, BPF program
@@ -10006,7 +10044,7 @@ int bpf_link__destroy(struct bpf_link *link)
                free(link->pin_path);
        free(link);
 
-       return err;
+       return libbpf_err(err);
 }
 
 int bpf_link__fd(const struct bpf_link *link)
@@ -10021,7 +10059,7 @@ const char *bpf_link__pin_path(const struct bpf_link *link)
 
 static int bpf_link__detach_fd(struct bpf_link *link)
 {
-       return close(link->fd);
+       return libbpf_err_errno(close(link->fd));
 }
 
 struct bpf_link *bpf_link__open(const char *path)
@@ -10033,13 +10071,13 @@ struct bpf_link *bpf_link__open(const char *path)
        if (fd < 0) {
                fd = -errno;
                pr_warn("failed to open link at %s: %d\n", path, fd);
-               return ERR_PTR(fd);
+               return libbpf_err_ptr(fd);
        }
 
        link = calloc(1, sizeof(*link));
        if (!link) {
                close(fd);
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        }
        link->detach = &bpf_link__detach_fd;
        link->fd = fd;
@@ -10047,7 +10085,7 @@ struct bpf_link *bpf_link__open(const char *path)
        link->pin_path = strdup(path);
        if (!link->pin_path) {
                bpf_link__destroy(link);
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        }
 
        return link;
@@ -10063,22 +10101,22 @@ int bpf_link__pin(struct bpf_link *link, const char *path)
        int err;
 
        if (link->pin_path)
-               return -EBUSY;
+               return libbpf_err(-EBUSY);
        err = make_parent_dir(path);
        if (err)
-               return err;
+               return libbpf_err(err);
        err = check_path(path);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        link->pin_path = strdup(path);
        if (!link->pin_path)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
 
        if (bpf_obj_pin(link->fd, link->pin_path)) {
                err = -errno;
                zfree(&link->pin_path);
-               return err;
+               return libbpf_err(err);
        }
 
        pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
@@ -10090,11 +10128,11 @@ int bpf_link__unpin(struct bpf_link *link)
        int err;
 
        if (!link->pin_path)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        err = unlink(link->pin_path);
        if (err != 0)
-               return -errno;
+               return libbpf_err_errno(err);
 
        pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
        zfree(&link->pin_path);
@@ -10110,11 +10148,10 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
                err = -errno;
 
        close(link->fd);
-       return err;
+       return libbpf_err(err);
 }
 
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
-                                               int pfd)
+struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
 {
        char errmsg[STRERR_BUFSIZE];
        struct bpf_link *link;
@@ -10123,18 +10160,18 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
        if (pfd < 0) {
                pr_warn("prog '%s': invalid perf event FD %d\n",
                        prog->name, pfd);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
                pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
                        prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        link = calloc(1, sizeof(*link));
        if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        link->detach = &bpf_link__detach_perf_event;
        link->fd = pfd;
 
@@ -10146,14 +10183,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
                if (err == -EPROTO)
                        pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
                                prog->name, pfd);
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
        }
        if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
                err = -errno;
                free(link);
                pr_warn("prog '%s': failed to enable pfd %d: %s\n",
                        prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
        }
        return link;
 }
@@ -10277,16 +10314,16 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
                pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
                        prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
        }
        link = bpf_program__attach_perf_event(prog, pfd);
-       if (IS_ERR(link)) {
+       err = libbpf_get_error(link);
+       if (err) {
                close(pfd);
-               err = PTR_ERR(link);
                pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
                        prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return link;
+               return libbpf_err_ptr(err);
        }
        return link;
 }
@@ -10319,17 +10356,17 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
                        prog->name, retprobe ? "uretprobe" : "uprobe",
                        binary_path, func_offset,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
        }
        link = bpf_program__attach_perf_event(prog, pfd);
-       if (IS_ERR(link)) {
+       err = libbpf_get_error(link);
+       if (err) {
                close(pfd);
-               err = PTR_ERR(link);
                pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
                        prog->name, retprobe ? "uretprobe" : "uprobe",
                        binary_path, func_offset,
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return link;
+               return libbpf_err_ptr(err);
        }
        return link;
 }
@@ -10397,16 +10434,16 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
                pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
                        prog->name, tp_category, tp_name,
                        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
        }
        link = bpf_program__attach_perf_event(prog, pfd);
-       if (IS_ERR(link)) {
+       err = libbpf_get_error(link);
+       if (err) {
                close(pfd);
-               err = PTR_ERR(link);
                pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
                        prog->name, tp_category, tp_name,
                        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
-               return link;
+               return libbpf_err_ptr(err);
        }
        return link;
 }
@@ -10419,20 +10456,19 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
 
        sec_name = strdup(prog->sec_name);
        if (!sec_name)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
 
        /* extract "tp/<category>/<name>" */
        tp_cat = sec_name + sec->len;
        tp_name = strchr(tp_cat, '/');
        if (!tp_name) {
-               link = ERR_PTR(-EINVAL);
-               goto out;
+               free(sec_name);
+               return libbpf_err_ptr(-EINVAL);
        }
        *tp_name = '\0';
        tp_name++;
 
        link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
-out:
        free(sec_name);
        return link;
 }
@@ -10447,12 +10483,12 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
                pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        link = calloc(1, sizeof(*link));
        if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        link->detach = &bpf_link__detach_fd;
 
        pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
@@ -10461,7 +10497,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
                free(link);
                pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
                        prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
        }
        link->fd = pfd;
        return link;
@@ -10485,12 +10521,12 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
                pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        link = calloc(1, sizeof(*link));
        if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        link->detach = &bpf_link__detach_fd;
 
        pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
@@ -10499,7 +10535,7 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
                free(link);
                pr_warn("prog '%s': failed to attach: %s\n",
                        prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(pfd);
+               return libbpf_err_ptr(pfd);
        }
        link->fd = pfd;
        return (struct bpf_link *)link;
@@ -10527,12 +10563,6 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
        return bpf_program__attach_lsm(prog);
 }
 
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
-                                   struct bpf_program *prog)
-{
-       return bpf_program__attach_iter(prog, NULL);
-}
-
 static struct bpf_link *
 bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
                       const char *target_name)
@@ -10547,12 +10577,12 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
                pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        link = calloc(1, sizeof(*link));
        if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        link->detach = &bpf_link__detach_fd;
 
        attach_type = bpf_program__get_expected_attach_type(prog);
@@ -10563,7 +10593,7 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
                pr_warn("prog '%s': failed to attach to %s: %s\n",
                        prog->name, target_name,
                        libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(link_fd);
+               return libbpf_err_ptr(link_fd);
        }
        link->fd = link_fd;
        return link;
@@ -10596,19 +10626,19 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
        if (!!target_fd != !!attach_func_name) {
                pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
                        prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        if (prog->type != BPF_PROG_TYPE_EXT) {
                pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
                        prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        if (target_fd) {
                btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
                if (btf_id < 0)
-                       return ERR_PTR(btf_id);
+                       return libbpf_err_ptr(btf_id);
 
                return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
        } else {
@@ -10630,7 +10660,7 @@ bpf_program__attach_iter(struct bpf_program *prog,
        __u32 target_fd = 0;
 
        if (!OPTS_VALID(opts, bpf_iter_attach_opts))
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
 
        link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
        link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
@@ -10638,12 +10668,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
        prog_fd = bpf_program__fd(prog);
        if (prog_fd < 0) {
                pr_warn("prog '%s': can't attach before loaded\n", prog->name);
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
        }
 
        link = calloc(1, sizeof(*link));
        if (!link)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
        link->detach = &bpf_link__detach_fd;
 
        link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
@@ -10653,19 +10683,25 @@ bpf_program__attach_iter(struct bpf_program *prog,
                free(link);
                pr_warn("prog '%s': failed to attach to iterator: %s\n",
                        prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
-               return ERR_PTR(link_fd);
+               return libbpf_err_ptr(link_fd);
        }
        link->fd = link_fd;
        return link;
 }
 
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+                                   struct bpf_program *prog)
+{
+       return bpf_program__attach_iter(prog, NULL);
+}
+
 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 {
        const struct bpf_sec_def *sec_def;
 
        sec_def = find_sec_def(prog->sec_name);
        if (!sec_def || !sec_def->attach_fn)
-               return ERR_PTR(-ESRCH);
+               return libbpf_err_ptr(-ESRCH);
 
        return sec_def->attach_fn(sec_def, prog);
 }
@@ -10688,11 +10724,11 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
        int err;
 
        if (!bpf_map__is_struct_ops(map) || map->fd == -1)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
 
        link = calloc(1, sizeof(*link));
        if (!link)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
 
        st_ops = map->st_ops;
        for (i = 0; i < btf_vlen(st_ops->type); i++) {
@@ -10712,7 +10748,7 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
        if (err) {
                err = -errno;
                free(link);
-               return ERR_PTR(err);
+               return libbpf_err_ptr(err);
        }
 
        link->detach = bpf_link__detach_struct_ops;
@@ -10766,7 +10802,7 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
        }
 
        ring_buffer_write_tail(header, data_tail);
-       return ret;
+       return libbpf_err(ret);
 }
 
 struct perf_buffer;
@@ -10919,7 +10955,7 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
        p.lost_cb = opts ? opts->lost_cb : NULL;
        p.ctx = opts ? opts->ctx : NULL;
 
-       return __perf_buffer__new(map_fd, page_cnt, &p);
+       return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
 struct perf_buffer *
@@ -10935,7 +10971,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
        p.cpus = opts->cpus;
        p.map_keys = opts->map_keys;
 
-       return __perf_buffer__new(map_fd, page_cnt, &p);
+       return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
 }
 
 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
@@ -11156,16 +11192,19 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
        int i, cnt, err;
 
        cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
+       if (cnt < 0)
+               return libbpf_err_errno(cnt);
+
        for (i = 0; i < cnt; i++) {
                struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
 
                err = perf_buffer__process_records(pb, cpu_buf);
                if (err) {
                        pr_warn("error while processing records: %d\n", err);
-                       return err;
+                       return libbpf_err(err);
                }
        }
-       return cnt < 0 ? -errno : cnt;
+       return cnt;
 }
 
 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
@@ -11186,11 +11225,11 @@ int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
        struct perf_cpu_buf *cpu_buf;
 
        if (buf_idx >= pb->cpu_cnt)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        cpu_buf = pb->cpu_bufs[buf_idx];
        if (!cpu_buf)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
 
        return cpu_buf->fd;
 }
@@ -11208,11 +11247,11 @@ int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
        struct perf_cpu_buf *cpu_buf;
 
        if (buf_idx >= pb->cpu_cnt)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        cpu_buf = pb->cpu_bufs[buf_idx];
        if (!cpu_buf)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
 
        return perf_buffer__process_records(pb, cpu_buf);
 }
@@ -11230,7 +11269,7 @@ int perf_buffer__consume(struct perf_buffer *pb)
                err = perf_buffer__process_records(pb, cpu_buf);
                if (err) {
                        pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
-                       return err;
+                       return libbpf_err(err);
                }
        }
        return 0;
@@ -11342,13 +11381,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
        void *ptr;
 
        if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
-               return ERR_PTR(-EINVAL);
+               return libbpf_err_ptr(-EINVAL);
 
        /* step 1: get array dimensions */
        err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
        if (err) {
                pr_debug("can't get prog info: %s", strerror(errno));
-               return ERR_PTR(-EFAULT);
+               return libbpf_err_ptr(-EFAULT);
        }
 
        /* step 2: calculate total size of all arrays */
@@ -11380,7 +11419,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
        data_len = roundup(data_len, sizeof(__u64));
        info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
        if (!info_linear)
-               return ERR_PTR(-ENOMEM);
+               return libbpf_err_ptr(-ENOMEM);
 
        /* step 4: fill data to info_linear->info */
        info_linear->arrays = arrays;
@@ -11412,7 +11451,7 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)
        if (err) {
                pr_debug("can't get prog info: %s", strerror(errno));
                free(info_linear);
-               return ERR_PTR(-EFAULT);
+               return libbpf_err_ptr(-EFAULT);
        }
 
        /* step 6: verify the data */
@@ -11491,26 +11530,26 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
        int btf_obj_fd = 0, btf_id = 0, err;
 
        if (!prog || attach_prog_fd < 0 || !attach_func_name)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (prog->obj->loaded)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (attach_prog_fd) {
                btf_id = libbpf_find_prog_btf_id(attach_func_name,
                                                 attach_prog_fd);
                if (btf_id < 0)
-                       return btf_id;
+                       return libbpf_err(btf_id);
        } else {
                /* load btf_vmlinux, if not yet */
                err = bpf_object__load_vmlinux_btf(prog->obj, true);
                if (err)
-                       return err;
+                       return libbpf_err(err);
                err = find_kernel_btf_id(prog->obj, attach_func_name,
                                         prog->expected_attach_type,
                                         &btf_obj_fd, &btf_id);
                if (err)
-                       return err;
+                       return libbpf_err(err);
        }
 
        prog->attach_btf_id = btf_id;
@@ -11609,7 +11648,7 @@ int libbpf_num_possible_cpus(void)
 
        err = parse_cpu_mask_file(fcpu, &mask, &n);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        tmp_cpus = 0;
        for (i = 0; i < n; i++) {
@@ -11629,7 +11668,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
                .object_name = s->name,
        );
        struct bpf_object *obj;
-       int i;
+       int i, err;
 
        /* Attempt to preserve opts->object_name, unless overriden by user
         * explicitly. Overwriting object name for skeletons is discouraged,
@@ -11644,10 +11683,11 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
        }
 
        obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
-       if (IS_ERR(obj)) {
-               pr_warn("failed to initialize skeleton BPF object '%s': %ld\n",
-                       s->name, PTR_ERR(obj));
-               return PTR_ERR(obj);
+       err = libbpf_get_error(obj);
+       if (err) {
+               pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
+                       s->name, err);
+               return libbpf_err(err);
        }
 
        *s->obj = obj;
@@ -11660,7 +11700,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
                *map = bpf_object__find_map_by_name(obj, name);
                if (!*map) {
                        pr_warn("failed to find skeleton map '%s'\n", name);
-                       return -ESRCH;
+                       return libbpf_err(-ESRCH);
                }
 
                /* externs shouldn't be pre-setup from user code */
@@ -11675,7 +11715,7 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
                *prog = bpf_object__find_program_by_name(obj, name);
                if (!*prog) {
                        pr_warn("failed to find skeleton program '%s'\n", name);
-                       return -ESRCH;
+                       return libbpf_err(-ESRCH);
                }
        }
 
@@ -11689,7 +11729,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
        err = bpf_object__load(*s->obj);
        if (err) {
                pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
-               return err;
+               return libbpf_err(err);
        }
 
        for (i = 0; i < s->map_cnt; i++) {
@@ -11728,7 +11768,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
                        *mmaped = NULL;
                        pr_warn("failed to re-mmap() map '%s': %d\n",
                                 bpf_map__name(map), err);
-                       return err;
+                       return libbpf_err(err);
                }
        }
 
@@ -11737,7 +11777,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
 
 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 {
-       int i;
+       int i, err;
 
        for (i = 0; i < s->prog_cnt; i++) {
                struct bpf_program *prog = *s->progs[i].prog;
@@ -11752,10 +11792,11 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
                        continue;
 
                *link = sec_def->attach_fn(sec_def, prog);
-               if (IS_ERR(*link)) {
-                       pr_warn("failed to auto-attach program '%s': %ld\n",
-                               bpf_program__name(prog), PTR_ERR(*link));
-                       return PTR_ERR(*link);
+               err = libbpf_get_error(*link);
+               if (err) {
+                       pr_warn("failed to auto-attach program '%s': %d\n",
+                               bpf_program__name(prog), err);
+                       return libbpf_err(err);
                }
        }
 
index d985235..6e61342 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/bpf.h>
 
 #include "libbpf_common.h"
+#include "libbpf_legacy.h"
 
 #ifdef __cplusplus
 extern "C" {
index 2abef6f..944c99d 100644 (file)
@@ -359,9 +359,7 @@ LIBBPF_0.4.0 {
                bpf_linker__finalize;
                bpf_linker__free;
                bpf_linker__new;
-               bpf_map__initial_value;
                bpf_map__inner_map;
-               bpf_object__gen_loader;
                bpf_object__set_kversion;
                bpf_tc_attach;
                bpf_tc_detach;
@@ -369,3 +367,11 @@ LIBBPF_0.4.0 {
                bpf_tc_hook_destroy;
                bpf_tc_query;
 } LIBBPF_0.3.0;
+
+LIBBPF_0.5.0 {
+       global:
+               bpf_map__initial_value;
+               bpf_map_lookup_and_delete_elem_flags;
+               bpf_object__gen_loader;
+               libbpf_set_strict_mode;
+} LIBBPF_0.4.0;
index 0afb51f..96f67a7 100644 (file)
@@ -12,6 +12,7 @@
 #include <string.h>
 
 #include "libbpf.h"
+#include "libbpf_internal.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
@@ -39,7 +40,7 @@ static const char *libbpf_strerror_table[NR_ERRNO] = {
 int libbpf_strerror(int err, char *buf, size_t size)
 {
        if (!buf || !size)
-               return -1;
+               return libbpf_err(-EINVAL);
 
        err = err > 0 ? err : -err;
 
@@ -48,7 +49,7 @@ int libbpf_strerror(int err, char *buf, size_t size)
 
                ret = strerror_r(err, buf, size);
                buf[size - 1] = '\0';
-               return ret;
+               return libbpf_err_errno(ret);
        }
 
        if (err < __LIBBPF_ERRNO__END) {
@@ -62,5 +63,5 @@ int libbpf_strerror(int err, char *buf, size_t size)
 
        snprintf(buf, size, "Unknown libbpf error %d", err);
        buf[size - 1] = '\0';
-       return -1;
+       return libbpf_err(-ENOENT);
 }
index a2cc297..016ca7c 100644 (file)
@@ -11,6 +11,9 @@
 
 #include <stdlib.h>
 #include <limits.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "libbpf_legacy.h"
 
 /* make sure libbpf doesn't use kernel-only integer typedefs */
 #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
 #ifndef R_BPF_64_64
 #define R_BPF_64_64 1
 #endif
+#ifndef R_BPF_64_ABS64
+#define R_BPF_64_ABS64 2
+#endif
+#ifndef R_BPF_64_ABS32
+#define R_BPF_64_ABS32 3
+#endif
 #ifndef R_BPF_64_32
 #define R_BPF_64_32 10
 #endif
@@ -435,4 +444,54 @@ int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ct
 int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
 int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
 
+extern enum libbpf_strict_mode libbpf_mode;
+
+/* handle direct returned errors */
+static inline int libbpf_err(int ret)
+{
+       if (ret < 0)
+               errno = -ret;
+       return ret;
+}
+
+/* handle errno-based (e.g., syscall or libc) errors according to libbpf's
+ * strict mode settings
+ */
+static inline int libbpf_err_errno(int ret)
+{
+       if (libbpf_mode & LIBBPF_STRICT_DIRECT_ERRS)
+               /* errno is already assumed to be set on error */
+               return ret < 0 ? -errno : ret;
+
+       /* legacy: on error return -1 directly and don't touch errno */
+       return ret;
+}
+
+/* handle error for pointer-returning APIs, err is assumed to be < 0 always */
+static inline void *libbpf_err_ptr(int err)
+{
+       /* set errno on error, this doesn't break anything */
+       errno = -err;
+
+       if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+               return NULL;
+
+       /* legacy: encode err as ptr */
+       return ERR_PTR(err);
+}
+
+/* handle pointer-returning APIs' error handling */
+static inline void *libbpf_ptr(void *ret)
+{
+       /* set errno on error, this doesn't break anything */
+       if (IS_ERR(ret))
+               errno = -PTR_ERR(ret);
+
+       if (libbpf_mode & LIBBPF_STRICT_CLEAN_PTRS)
+               return IS_ERR(ret) ? NULL : ret;
+
+       /* legacy: pass-through original pointer */
+       return ret;
+}
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
new file mode 100644 (file)
index 0000000..df0d03d
--- /dev/null
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Libbpf legacy APIs (either discouraged or deprecated, as mentioned in [0])
+ *
+ *   [0] https://docs.google.com/document/d/1UyjTZuPFWiPFyKk1tV5an11_iaRuec6U-ZESZ54nNTY
+ *
+ * Copyright (C) 2021 Facebook
+ */
+#ifndef __LIBBPF_LEGACY_BPF_H
+#define __LIBBPF_LEGACY_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include "libbpf_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum libbpf_strict_mode {
+       /* Turn on all supported strict features of libbpf to simulate libbpf
+        * v1.0 behavior.
+        * This will be the default behavior in libbpf v1.0.
+        */
+       LIBBPF_STRICT_ALL = 0xffffffff,
+
+       /*
+        * Disable any libbpf 1.0 behaviors. This is the default before libbpf
+        * v1.0. It won't be supported anymore in v1.0, please update your
+        * code so that it handles LIBBPF_STRICT_ALL mode before libbpf v1.0.
+        */
+       LIBBPF_STRICT_NONE = 0x00,
+       /*
+        * Return NULL pointers on error, not ERR_PTR(err).
+        * Additionally, libbpf also always sets errno to corresponding Exx
+        * (positive) error code.
+        */
+       LIBBPF_STRICT_CLEAN_PTRS = 0x01,
+       /*
+        * Return actual error codes from low-level APIs directly, not just -1.
+        * Additionally, libbpf also always sets errno to corresponding Exx
+        * (positive) error code.
+        */
+       LIBBPF_STRICT_DIRECT_ERRS = 0x02,
+
+       __LIBBPF_STRICT_LAST,
+};
+
+LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __LIBBPF_LEGACY_BPF_H */
index b594a88..10911a8 100644 (file)
@@ -220,16 +220,16 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
        int err;
 
        if (!OPTS_VALID(opts, bpf_linker_opts))
-               return NULL;
+               return errno = EINVAL, NULL;
 
        if (elf_version(EV_CURRENT) == EV_NONE) {
                pr_warn_elf("libelf initialization failed");
-               return NULL;
+               return errno = EINVAL, NULL;
        }
 
        linker = calloc(1, sizeof(*linker));
        if (!linker)
-               return NULL;
+               return errno = ENOMEM, NULL;
 
        linker->fd = -1;
 
@@ -241,7 +241,7 @@ struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts
 
 err_out:
        bpf_linker__free(linker);
-       return NULL;
+       return errno = -err, NULL;
 }
 
 static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
@@ -444,10 +444,10 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
        int err = 0;
 
        if (!OPTS_VALID(opts, bpf_linker_file_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (!linker->elf)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        err = err ?: linker_load_obj_file(linker, filename, opts, &obj);
        err = err ?: linker_append_sec_data(linker, &obj);
@@ -467,7 +467,7 @@ int bpf_linker__add_file(struct bpf_linker *linker, const char *filename,
        if (obj.fd >= 0)
                close(obj.fd);
 
-       return err;
+       return libbpf_err(err);
 }
 
 static bool is_dwarf_sec_name(const char *name)
@@ -892,7 +892,8 @@ static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *se
                size_t sym_idx = ELF64_R_SYM(relo->r_info);
                size_t sym_type = ELF64_R_TYPE(relo->r_info);
 
-               if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+               if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32 &&
+                   sym_type != R_BPF_64_ABS64 && sym_type != R_BPF_64_ABS32) {
                        pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
                                i, sec->sec_idx, sym_type, obj->filename);
                        return -EINVAL;
@@ -2547,11 +2548,11 @@ int bpf_linker__finalize(struct bpf_linker *linker)
        int err, i;
 
        if (!linker->elf)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        err = finalize_btf(linker);
        if (err)
-               return err;
+               return libbpf_err(err);
 
        /* Finalize strings */
        strs_sz = strset__data_size(linker->strtab_strs);
@@ -2583,14 +2584,14 @@ int bpf_linker__finalize(struct bpf_linker *linker)
        if (elf_update(linker->elf, ELF_C_NULL) < 0) {
                err = -errno;
                pr_warn_elf("failed to finalize ELF layout");
-               return err;
+               return libbpf_err(err);
        }
 
        /* Write out final ELF contents */
        if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
                err = -errno;
                pr_warn_elf("failed to write ELF contents");
-               return err;
+               return libbpf_err(err);
        }
 
        elf_end(linker->elf);
index 4744458..cf9381f 100644 (file)
@@ -225,22 +225,26 @@ static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
 int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
                             const struct bpf_xdp_set_link_opts *opts)
 {
-       int old_fd = -1;
+       int old_fd = -1, ret;
 
        if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        if (OPTS_HAS(opts, old_fd)) {
                old_fd = OPTS_GET(opts, old_fd, -1);
                flags |= XDP_FLAGS_REPLACE;
        }
 
-       return __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+       ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, old_fd, flags);
+       return libbpf_err(ret);
 }
 
 int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
 {
-       return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+       int ret;
+
+       ret = __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+       return libbpf_err(ret);
 }
 
 static int __dump_link_nlmsg(struct nlmsghdr *nlh,
@@ -321,13 +325,13 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
        };
 
        if (flags & ~XDP_FLAGS_MASK || !info_size)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        /* Check whether the single {HW,DRV,SKB} mode is set */
        flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE);
        mask = flags - 1;
        if (flags && flags & mask)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        xdp_id.ifindex = ifindex;
        xdp_id.flags = flags;
@@ -341,7 +345,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
                memset((void *) info + sz, 0, info_size - sz);
        }
 
-       return ret;
+       return libbpf_err(ret);
 }
 
 static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
@@ -369,7 +373,7 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)
        if (!ret)
                *prog_id = get_xdp_id(&info, flags);
 
-       return ret;
+       return libbpf_err(ret);
 }
 
 typedef int (*qdisc_config_t)(struct nlmsghdr *nh, struct tcmsg *t,
@@ -453,7 +457,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
 
 static int tc_qdisc_create_excl(struct bpf_tc_hook *hook)
 {
-       return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE);
+       return tc_qdisc_modify(hook, RTM_NEWQDISC, NLM_F_CREATE | NLM_F_EXCL);
 }
 
 static int tc_qdisc_delete(struct bpf_tc_hook *hook)
@@ -463,11 +467,14 @@ static int tc_qdisc_delete(struct bpf_tc_hook *hook)
 
 int bpf_tc_hook_create(struct bpf_tc_hook *hook)
 {
+       int ret;
+
        if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
            OPTS_GET(hook, ifindex, 0) <= 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
-       return tc_qdisc_create_excl(hook);
+       ret = tc_qdisc_create_excl(hook);
+       return libbpf_err(ret);
 }
 
 static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
@@ -478,18 +485,18 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
 {
        if (!hook || !OPTS_VALID(hook, bpf_tc_hook) ||
            OPTS_GET(hook, ifindex, 0) <= 0)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        switch (OPTS_GET(hook, attach_point, 0)) {
        case BPF_TC_INGRESS:
        case BPF_TC_EGRESS:
-               return __bpf_tc_detach(hook, NULL, true);
+               return libbpf_err(__bpf_tc_detach(hook, NULL, true));
        case BPF_TC_INGRESS | BPF_TC_EGRESS:
-               return tc_qdisc_delete(hook);
+               return libbpf_err(tc_qdisc_delete(hook));
        case BPF_TC_CUSTOM:
-               return -EOPNOTSUPP;
+               return libbpf_err(-EOPNOTSUPP);
        default:
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 }
 
@@ -574,7 +581,7 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
        if (!hook || !opts ||
            !OPTS_VALID(hook, bpf_tc_hook) ||
            !OPTS_VALID(opts, bpf_tc_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        ifindex      = OPTS_GET(hook, ifindex, 0);
        parent       = OPTS_GET(hook, parent, 0);
@@ -587,11 +594,11 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
        flags        = OPTS_GET(opts, flags, 0);
 
        if (ifindex <= 0 || !prog_fd || prog_id)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (priority > UINT16_MAX)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (flags & ~BPF_TC_F_REPLACE)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        flags = (flags & BPF_TC_F_REPLACE) ? NLM_F_REPLACE : NLM_F_EXCL;
        protocol = ETH_P_ALL;
@@ -608,32 +615,32 @@ int bpf_tc_attach(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 
        ret = tc_get_tcm_parent(attach_point, &parent);
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        req.tc.tcm_parent = parent;
 
        ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        nla = nlattr_begin_nested(&req.nh, sizeof(req), TCA_OPTIONS);
        if (!nla)
-               return -EMSGSIZE;
+               return libbpf_err(-EMSGSIZE);
        ret = tc_add_fd_and_name(&req.nh, sizeof(req), prog_fd);
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        bpf_flags = TCA_BPF_FLAG_ACT_DIRECT;
        ret = nlattr_add(&req.nh, sizeof(req), TCA_BPF_FLAGS, &bpf_flags,
                         sizeof(bpf_flags));
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        nlattr_end_nested(&req.nh, nla);
 
        info.opts = opts;
 
        ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        if (!info.processed)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
        return ret;
 }
 
@@ -668,8 +675,6 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
                return -EINVAL;
        if (priority > UINT16_MAX)
                return -EINVAL;
-       if (flags & ~BPF_TC_F_REPLACE)
-               return -EINVAL;
        if (!flush) {
                if (!handle || !priority)
                        return -EINVAL;
@@ -708,7 +713,13 @@ static int __bpf_tc_detach(const struct bpf_tc_hook *hook,
 int bpf_tc_detach(const struct bpf_tc_hook *hook,
                  const struct bpf_tc_opts *opts)
 {
-       return !opts ? -EINVAL : __bpf_tc_detach(hook, opts, false);
+       int ret;
+
+       if (!opts)
+               return libbpf_err(-EINVAL);
+
+       ret = __bpf_tc_detach(hook, opts, false);
+       return libbpf_err(ret);
 }
 
 int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
@@ -725,7 +736,7 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
        if (!hook || !opts ||
            !OPTS_VALID(hook, bpf_tc_hook) ||
            !OPTS_VALID(opts, bpf_tc_opts))
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        ifindex      = OPTS_GET(hook, ifindex, 0);
        parent       = OPTS_GET(hook, parent, 0);
@@ -739,9 +750,9 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 
        if (ifindex <= 0 || flags || prog_fd || prog_id ||
            !handle || !priority)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        if (priority > UINT16_MAX)
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
 
        protocol = ETH_P_ALL;
 
@@ -756,19 +767,19 @@ int bpf_tc_query(const struct bpf_tc_hook *hook, struct bpf_tc_opts *opts)
 
        ret = tc_get_tcm_parent(attach_point, &parent);
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        req.tc.tcm_parent = parent;
 
        ret = nlattr_add(&req.nh, sizeof(req), TCA_KIND, "bpf", sizeof("bpf"));
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
 
        info.opts = opts;
 
        ret = libbpf_netlink_send_recv(&req.nh, get_tc_info, NULL, &info);
        if (ret < 0)
-               return ret;
+               return libbpf_err(ret);
        if (!info.processed)
-               return -ENOENT;
+               return libbpf_err(-ENOENT);
        return ret;
 }
index 1d80ad4..8bc117b 100644 (file)
@@ -69,23 +69,23 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                err = -errno;
                pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
                        map_fd, err);
-               return err;
+               return libbpf_err(err);
        }
 
        if (info.type != BPF_MAP_TYPE_RINGBUF) {
                pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
                        map_fd);
-               return -EINVAL;
+               return libbpf_err(-EINVAL);
        }
 
        tmp = libbpf_reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
        if (!tmp)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
        rb->rings = tmp;
 
        tmp = libbpf_reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
        if (!tmp)
-               return -ENOMEM;
+               return libbpf_err(-ENOMEM);
        rb->events = tmp;
 
        r = &rb->rings[rb->ring_cnt];
@@ -103,7 +103,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                err = -errno;
                pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
                        map_fd, err);
-               return err;
+               return libbpf_err(err);
        }
        r->consumer_pos = tmp;
 
@@ -118,7 +118,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                ringbuf_unmap_ring(rb, r);
                pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
                        map_fd, err);
-               return err;
+               return libbpf_err(err);
        }
        r->producer_pos = tmp;
        r->data = tmp + rb->page_size;
@@ -133,7 +133,7 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd,
                ringbuf_unmap_ring(rb, r);
                pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
                        map_fd, err);
-               return err;
+               return libbpf_err(err);
        }
 
        rb->ring_cnt++;
@@ -165,11 +165,11 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
        int err;
 
        if (!OPTS_VALID(opts, ring_buffer_opts))
-               return NULL;
+               return errno = EINVAL, NULL;
 
        rb = calloc(1, sizeof(*rb));
        if (!rb)
-               return NULL;
+               return errno = ENOMEM, NULL;
 
        rb->page_size = getpagesize();
 
@@ -188,7 +188,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
 
 err_out:
        ring_buffer__free(rb);
-       return NULL;
+       return errno = -err, NULL;
 }
 
 static inline int roundup_len(__u32 len)
@@ -260,7 +260,7 @@ int ring_buffer__consume(struct ring_buffer *rb)
 
                err = ringbuf_process_ring(ring);
                if (err < 0)
-                       return err;
+                       return libbpf_err(err);
                res += err;
        }
        if (res > INT_MAX)
@@ -279,7 +279,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 
        cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
        if (cnt < 0)
-               return -errno;
+               return libbpf_err(-errno);
 
        for (i = 0; i < cnt; i++) {
                __u32 ring_id = rb->events[i].data.fd;
@@ -287,7 +287,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
 
                err = ringbuf_process_ring(ring);
                if (err < 0)
-                       return err;
+                       return libbpf_err(err);
                res += err;
        }
        if (res > INT_MAX)
index a030aa4..addcfd8 100644 (file)
@@ -10,6 +10,7 @@ FEATURE-DUMP.libbpf
 fixdep
 test_dev_cgroup
 /test_progs*
+!test_progs.h
 test_verifier_log
 feature
 test_sock
@@ -37,4 +38,6 @@ test_cpp
 /runqslower
 /bench
 *.ko
+*.tmp
 xdpxceiver
+xdp_redirect_multi
index 525e4b3..f405b20 100644 (file)
@@ -54,6 +54,7 @@ TEST_FILES = xsk_prereqs.sh \
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
        test_xdp_redirect.sh \
+       test_xdp_redirect_multi.sh \
        test_xdp_meta.sh \
        test_xdp_veth.sh \
        test_offload.py \
@@ -84,7 +85,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
        flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
        test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-       xdpxceiver
+       xdpxceiver xdp_redirect_multi
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 
index ccf2600..eb6a4fe 100644 (file)
@@ -52,7 +52,8 @@ $(OUTPUT)%.$2: $(OUTPUT)%.rst
 ifndef RST2MAN_DEP
        $$(error "rst2man not found, but required to generate man pages")
 endif
-       $$(QUIET_GEN)rst2man $$< > $$@
+       $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+       $$(QUIET_GEN)mv $$@.tmp $$@
 
 docs-clean-$1:
        $$(call QUIET_CLEAN, eBPF_$1-manpage)
index 3353778..8deec1c 100644 (file)
@@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use
 Clang that contains the fix.
 
 __ https://reviews.llvm.org/D100362
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_  made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+  libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://reviews.llvm.org/D102712
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
index 332ed2f..6ea15b9 100644 (file)
@@ -43,6 +43,7 @@ void setup_libbpf()
 {
        int err;
 
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
        libbpf_set_print(libbpf_print_fn);
 
        err = bump_memlock_rlimit();
index a967674..c7ec114 100644 (file)
@@ -65,7 +65,7 @@ static void attach_bpf(struct bpf_program *prog)
        struct bpf_link *link;
 
        link = bpf_program__attach(prog);
-       if (IS_ERR(link)) {
+       if (!link) {
                fprintf(stderr, "failed to attach program!\n");
                exit(1);
        }
index bde6c9d..d167bff 100644 (file)
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
        }
 
        link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
-       if (IS_ERR(link)) {
+       if (!link) {
                fprintf(stderr, "failed to attach program!\n");
                exit(1);
        }
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
        }
 
        link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
-       if (IS_ERR(link)) {
+       if (!link) {
                fprintf(stderr, "failed to attach program\n");
                exit(1);
        }
@@ -430,7 +430,7 @@ static void perfbuf_libbpf_setup()
        }
 
        link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
-       if (IS_ERR(link)) {
+       if (!link) {
                fprintf(stderr, "failed to attach program\n");
                exit(1);
        }
index 2a0b6c9..f41a491 100644 (file)
@@ -60,7 +60,7 @@ static void attach_bpf(struct bpf_program *prog)
        struct bpf_link *link;
 
        link = bpf_program__attach(prog);
-       if (IS_ERR(link)) {
+       if (!link) {
                fprintf(stderr, "failed to attach program!\n");
                exit(1);
        }
index 9dc4e3d..ec11e20 100644 (file)
@@ -85,16 +85,14 @@ void test_attach_probe(void)
        kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
                                                 false /* retprobe */,
                                                 SYS_NANOSLEEP_KPROBE_NAME);
-       if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
-                 "err %ld\n", PTR_ERR(kprobe_link)))
+       if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
                goto cleanup;
        skel->links.handle_kprobe = kprobe_link;
 
        kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
                                                    true /* retprobe */,
                                                    SYS_NANOSLEEP_KPROBE_NAME);
-       if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
-                 "err %ld\n", PTR_ERR(kretprobe_link)))
+       if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
                goto cleanup;
        skel->links.handle_kretprobe = kretprobe_link;
 
@@ -103,8 +101,7 @@ void test_attach_probe(void)
                                                 0 /* self pid */,
                                                 "/proc/self/exe",
                                                 uprobe_offset);
-       if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
-                 "err %ld\n", PTR_ERR(uprobe_link)))
+       if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
                goto cleanup;
        skel->links.handle_uprobe = uprobe_link;
 
@@ -113,8 +110,7 @@ void test_attach_probe(void)
                                                    -1 /* any pid */,
                                                    "/proc/self/exe",
                                                    uprobe_offset);
-       if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
-                 "err %ld\n", PTR_ERR(uretprobe_link)))
+       if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
                goto cleanup;
        skel->links.handle_uretprobe = uretprobe_link;
 
index 2d3590c..1f1aade 100644 (file)
@@ -47,7 +47,7 @@ static void do_dummy_read(struct bpf_program *prog)
        int iter_fd, len;
 
        link = bpf_program__attach_iter(prog, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                return;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -201,7 +201,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
        int ret = 0;
 
        link = bpf_program__attach_iter(prog, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                return ret;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -396,7 +396,7 @@ static void test_file_iter(void)
                return;
 
        link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        /* unlink this path if it exists. */
@@ -502,7 +502,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
        skel->bss->map2_id = map_info.id;
 
        link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto free_map2;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -607,14 +607,12 @@ static void test_bpf_hash_map(void)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(!IS_ERR(link), "attach_iter",
-                 "attach_iter for hashmap2 unexpected succeeded\n"))
+       if (!ASSERT_ERR_PTR(link, "attach_iter"))
                goto out;
 
        linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(!IS_ERR(link), "attach_iter",
-                 "attach_iter for hashmap3 unexpected succeeded\n"))
+       if (!ASSERT_ERR_PTR(link, "attach_iter"))
                goto out;
 
        /* hashmap1 should be good, update map values here */
@@ -636,7 +634,7 @@ static void test_bpf_hash_map(void)
 
        linfo.map.map_fd = map_fd;
        link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -727,7 +725,7 @@ static void test_bpf_percpu_hash_map(void)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -798,7 +796,7 @@ static void test_bpf_array_map(void)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -894,7 +892,7 @@ static void test_bpf_percpu_array_map(void)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -957,7 +955,7 @@ static void test_bpf_sk_storage_delete(void)
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
                                        &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1075,7 +1073,7 @@ static void test_bpf_sk_storage_map(void)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -1128,7 +1126,7 @@ static void test_rdonly_buf_out_of_bound(void)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
-       if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
+       if (!ASSERT_ERR_PTR(link, "attach_iter"))
                bpf_link__destroy(link);
 
        bpf_iter_test_kern5__destroy(skel);
@@ -1186,8 +1184,7 @@ static void test_task_vma(void)
        skel->links.proc_maps = bpf_program__attach_iter(
                skel->progs.proc_maps, NULL);
 
-       if (CHECK(IS_ERR(skel->links.proc_maps), "bpf_program__attach_iter",
-                 "attach iterator failed\n")) {
+       if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
                skel->links.proc_maps = NULL;
                goto out;
        }
index e25917f..efe1e97 100644 (file)
@@ -82,7 +82,7 @@ static void *server(void *arg)
              bytes, total_bytes, nr_sent, errno);
 
 done:
-       if (fd != -1)
+       if (fd >= 0)
                close(fd);
        if (err) {
                WRITE_ONCE(stop, 1);
@@ -191,8 +191,7 @@ static void test_cubic(void)
                return;
 
        link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
-       if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
-                 PTR_ERR(link))) {
+       if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
                bpf_cubic__destroy(cubic_skel);
                return;
        }
@@ -213,8 +212,7 @@ static void test_dctcp(void)
                return;
 
        link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
-       if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
-                 PTR_ERR(link))) {
+       if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
                bpf_dctcp__destroy(dctcp_skel);
                return;
        }
index 0457ae3..857e3f2 100644 (file)
@@ -3811,7 +3811,7 @@ static void do_test_raw(unsigned int test_num)
                              always_log);
        free(raw_btf);
 
-       err = ((btf_fd == -1) != test->btf_load_err);
+       err = ((btf_fd < 0) != test->btf_load_err);
        if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
                  btf_fd, test->btf_load_err) ||
            CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -3820,7 +3820,7 @@ static void do_test_raw(unsigned int test_num)
                goto done;
        }
 
-       if (err || btf_fd == -1)
+       if (err || btf_fd < 0)
                goto done;
 
        create_attr.name = test->map_name;
@@ -3834,16 +3834,16 @@ static void do_test_raw(unsigned int test_num)
 
        map_fd = bpf_create_map_xattr(&create_attr);
 
-       err = ((map_fd == -1) != test->map_create_err);
+       err = ((map_fd < 0) != test->map_create_err);
        CHECK(err, "map_fd:%d test->map_create_err:%u",
              map_fd, test->map_create_err);
 
 done:
        if (*btf_log_buf && (err || always_log))
                fprintf(stderr, "\n%s", btf_log_buf);
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                close(btf_fd);
-       if (map_fd != -1)
+       if (map_fd >= 0)
                close(map_fd);
 }
 
@@ -3941,7 +3941,7 @@ static int test_big_btf_info(unsigned int test_num)
        btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
                              btf_log_buf, BTF_LOG_BUF_SIZE,
                              always_log);
-       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -3987,7 +3987,7 @@ done:
        free(raw_btf);
        free(user_btf);
 
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                close(btf_fd);
 
        return err;
@@ -4029,7 +4029,7 @@ static int test_btf_id(unsigned int test_num)
        btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
                                 btf_log_buf, BTF_LOG_BUF_SIZE,
                                 always_log);
-       if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4043,7 +4043,7 @@ static int test_btf_id(unsigned int test_num)
        }
 
        btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
-       if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4071,7 +4071,7 @@ static int test_btf_id(unsigned int test_num)
        create_attr.btf_value_type_id = 2;
 
        map_fd = bpf_create_map_xattr(&create_attr);
-       if (CHECK(map_fd == -1, "errno:%d", errno)) {
+       if (CHECK(map_fd < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4094,7 +4094,7 @@ static int test_btf_id(unsigned int test_num)
 
        /* Test BTF ID is removed from the kernel */
        btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-       if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4105,7 +4105,7 @@ static int test_btf_id(unsigned int test_num)
        close(map_fd);
        map_fd = -1;
        btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
-       if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+       if (CHECK(btf_fd[0] >= 0, "BTF lingers")) {
                err = -1;
                goto done;
        }
@@ -4117,11 +4117,11 @@ done:
                fprintf(stderr, "\n%s", btf_log_buf);
 
        free(raw_btf);
-       if (map_fd != -1)
+       if (map_fd >= 0)
                close(map_fd);
        for (i = 0; i < 2; i++) {
                free(user_btf[i]);
-               if (btf_fd[i] != -1)
+               if (btf_fd[i] >= 0)
                        close(btf_fd[i]);
        }
 
@@ -4166,7 +4166,7 @@ static void do_test_get_info(unsigned int test_num)
        btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
                              btf_log_buf, BTF_LOG_BUF_SIZE,
                              always_log);
-       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4212,7 +4212,7 @@ done:
        free(raw_btf);
        free(user_btf);
 
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                close(btf_fd);
 }
 
@@ -4249,8 +4249,9 @@ static void do_test_file(unsigned int test_num)
                return;
 
        btf = btf__parse_elf(test->file, &btf_ext);
-       if (IS_ERR(btf)) {
-               if (PTR_ERR(btf) == -ENOENT) {
+       err = libbpf_get_error(btf);
+       if (err) {
+               if (err == -ENOENT) {
                        printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
                        test__skip();
                        return;
@@ -4263,7 +4264,8 @@ static void do_test_file(unsigned int test_num)
        btf_ext__free(btf_ext);
 
        obj = bpf_object__open(test->file);
-       if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+       err = libbpf_get_error(obj);
+       if (CHECK(err, "obj: %d", err))
                return;
 
        prog = bpf_program__next(NULL, obj);
@@ -4298,7 +4300,7 @@ static void do_test_file(unsigned int test_num)
        info_len = sizeof(struct bpf_prog_info);
        err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
 
-       if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
                fprintf(stderr, "%s\n", btf_log_buf);
                err = -1;
                goto done;
@@ -4330,7 +4332,7 @@ static void do_test_file(unsigned int test_num)
 
        err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
 
-       if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
                fprintf(stderr, "%s\n", btf_log_buf);
                err = -1;
                goto done;
@@ -4886,7 +4888,7 @@ static void do_test_pprint(int test_num)
                              always_log);
        free(raw_btf);
 
-       if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+       if (CHECK(btf_fd < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4901,7 +4903,7 @@ static void do_test_pprint(int test_num)
        create_attr.btf_value_type_id = test->value_type_id;
 
        map_fd = bpf_create_map_xattr(&create_attr);
-       if (CHECK(map_fd == -1, "errno:%d", errno)) {
+       if (CHECK(map_fd < 0, "errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -4982,7 +4984,7 @@ static void do_test_pprint(int test_num)
 
                                        err = check_line(expected_line, nexpected_line,
                                                         sizeof(expected_line), line);
-                                       if (err == -1)
+                                       if (err < 0)
                                                goto done;
                                }
 
@@ -4998,7 +5000,7 @@ static void do_test_pprint(int test_num)
                                                                  cpu, cmapv);
                        err = check_line(expected_line, nexpected_line,
                                         sizeof(expected_line), line);
-                       if (err == -1)
+                       if (err < 0)
                                goto done;
 
                        cmapv = cmapv + rounded_value_size;
@@ -5036,9 +5038,9 @@ done:
                fprintf(stderr, "OK");
        if (*btf_log_buf && (err || always_log))
                fprintf(stderr, "\n%s", btf_log_buf);
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                close(btf_fd);
-       if (map_fd != -1)
+       if (map_fd >= 0)
                close(map_fd);
        if (pin_file)
                fclose(pin_file);
@@ -5950,7 +5952,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
        /* get necessary lens */
        info_len = sizeof(struct bpf_prog_info);
        err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
                fprintf(stderr, "%s\n", btf_log_buf);
                return -1;
        }
@@ -5980,7 +5982,7 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
        info.func_info_rec_size = rec_size;
        info.func_info = ptr_to_u64(func_info);
        err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+       if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
                fprintf(stderr, "%s\n", btf_log_buf);
                err = -1;
                goto done;
@@ -6044,7 +6046,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
 
        info_len = sizeof(struct bpf_prog_info);
        err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
-       if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+       if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
                err = -1;
                goto done;
        }
@@ -6123,7 +6125,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
         * Only recheck the info.*line_info* fields.
         * Other fields are not the concern of this test.
         */
-       if (CHECK(err == -1 ||
+       if (CHECK(err < 0 ||
                  info.nr_line_info != cnt ||
                  (jited_cnt && !info.jited_line_info) ||
                  info.nr_jited_line_info != jited_cnt ||
@@ -6260,7 +6262,7 @@ static void do_test_info_raw(unsigned int test_num)
                              always_log);
        free(raw_btf);
 
-       if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+       if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
                err = -1;
                goto done;
        }
@@ -6273,7 +6275,8 @@ static void do_test_info_raw(unsigned int test_num)
        patched_linfo = patch_name_tbd(test->line_info,
                                       test->str_sec, linfo_str_off,
                                       test->str_sec_size, &linfo_size);
-       if (IS_ERR(patched_linfo)) {
+       err = libbpf_get_error(patched_linfo);
+       if (err) {
                fprintf(stderr, "error in creating raw bpf_line_info");
                err = -1;
                goto done;
@@ -6297,7 +6300,7 @@ static void do_test_info_raw(unsigned int test_num)
        }
 
        prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
-       err = ((prog_fd == -1) != test->expected_prog_load_failure);
+       err = ((prog_fd < 0) != test->expected_prog_load_failure);
        if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
                  prog_fd, test->expected_prog_load_failure, errno) ||
            CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6306,7 +6309,7 @@ static void do_test_info_raw(unsigned int test_num)
                goto done;
        }
 
-       if (prog_fd == -1)
+       if (prog_fd < 0)
                goto done;
 
        err = test_get_finfo(test, prog_fd);
@@ -6323,12 +6326,12 @@ done:
        if (*btf_log_buf && (err || always_log))
                fprintf(stderr, "\n%s", btf_log_buf);
 
-       if (btf_fd != -1)
+       if (btf_fd >= 0)
                close(btf_fd);
-       if (prog_fd != -1)
+       if (prog_fd >= 0)
                close(prog_fd);
 
-       if (!IS_ERR(patched_linfo))
+       if (!libbpf_get_error(patched_linfo))
                free(patched_linfo);
 }
 
@@ -6839,9 +6842,9 @@ static void do_test_dedup(unsigned int test_num)
                return;
 
        test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+       err = libbpf_get_error(test_btf);
        free(raw_btf);
-       if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
-                 PTR_ERR(test_btf))) {
+       if (CHECK(err, "invalid test_btf errno:%d", err)) {
                err = -1;
                goto done;
        }
@@ -6853,9 +6856,9 @@ static void do_test_dedup(unsigned int test_num)
        if (!raw_btf)
                return;
        expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+       err = libbpf_get_error(expect_btf);
        free(raw_btf);
-       if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
-                 PTR_ERR(expect_btf))) {
+       if (CHECK(err, "invalid expect_btf errno:%d", err)) {
                err = -1;
                goto done;
        }
@@ -6966,10 +6969,8 @@ static void do_test_dedup(unsigned int test_num)
        }
 
 done:
-       if (!IS_ERR(test_btf))
-               btf__free(test_btf);
-       if (!IS_ERR(expect_btf))
-               btf__free(expect_btf);
+       btf__free(test_btf);
+       btf__free(expect_btf);
 }
 
 void test_btf(void)
index 5e129dc..1b90e68 100644 (file)
@@ -32,8 +32,9 @@ static int btf_dump_all_types(const struct btf *btf,
        int err = 0, id;
 
        d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
-       if (IS_ERR(d))
-               return PTR_ERR(d);
+       err = libbpf_get_error(d);
+       if (err)
+               return err;
 
        for (id = 1; id <= type_cnt; id++) {
                err = btf_dump__dump_type(d, id);
@@ -56,8 +57,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
        snprintf(test_file, sizeof(test_file), "%s.o", t->file);
 
        btf = btf__parse_elf(test_file, NULL);
-       if (CHECK(IS_ERR(btf), "btf_parse_elf",
-           "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+       if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
                err = -PTR_ERR(btf);
                btf = NULL;
                goto done;
index f36da15..022c7d8 100644 (file)
@@ -4,8 +4,6 @@
 #include <bpf/btf.h>
 #include "btf_helpers.h"
 
-static int duration = 0;
-
 void test_btf_write() {
        const struct btf_var_secinfo *vi;
        const struct btf_type *t;
@@ -16,7 +14,7 @@ void test_btf_write() {
        int id, err, str_off;
 
        btf = btf__new_empty();
-       if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+       if (!ASSERT_OK_PTR(btf, "new_empty"))
                return;
 
        str_off = btf__find_str(btf, "int");
index 643dfa3..876be0e 100644 (file)
@@ -102,8 +102,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
         */
        parent_link = bpf_program__attach_cgroup(obj->progs.egress,
                                                 parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_link), "parent-cg-attach",
-                 "err %ld", PTR_ERR(parent_link)))
+       if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
                goto close_bpf_object;
        err = connect_send(CHILD_CGROUP);
        if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -126,8 +125,7 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
         */
        child_link = bpf_program__attach_cgroup(obj->progs.egress,
                                                child_cgroup_fd);
-       if (CHECK(IS_ERR(child_link), "child-cg-attach",
-                 "err %ld", PTR_ERR(child_link)))
+       if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
                goto close_bpf_object;
        err = connect_send(CHILD_CGROUP);
        if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -147,10 +145,8 @@ static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
                goto close_bpf_object;
 
 close_bpf_object:
-       if (!IS_ERR(parent_link))
-               bpf_link__destroy(parent_link);
-       if (!IS_ERR(child_link))
-               bpf_link__destroy(child_link);
+       bpf_link__destroy(parent_link);
+       bpf_link__destroy(child_link);
 
        cg_storage_multi_egress_only__destroy(obj);
 }
@@ -176,18 +172,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
         */
        parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                         parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress1_link)))
+       if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
                goto close_bpf_object;
        parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                         parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress2_link)))
+       if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
                goto close_bpf_object;
        parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                         parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
-                 "err %ld", PTR_ERR(parent_ingress_link)))
+       if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
                goto close_bpf_object;
        err = connect_send(CHILD_CGROUP);
        if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -221,18 +214,15 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
         */
        child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                        child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
-                 "err %ld", PTR_ERR(child_egress1_link)))
+       if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
                goto close_bpf_object;
        child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                        child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
-                 "err %ld", PTR_ERR(child_egress2_link)))
+       if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
                goto close_bpf_object;
        child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                        child_cgroup_fd);
-       if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
-                 "err %ld", PTR_ERR(child_ingress_link)))
+       if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
                goto close_bpf_object;
        err = connect_send(CHILD_CGROUP);
        if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -264,18 +254,12 @@ static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
                goto close_bpf_object;
 
 close_bpf_object:
-       if (!IS_ERR(parent_egress1_link))
-               bpf_link__destroy(parent_egress1_link);
-       if (!IS_ERR(parent_egress2_link))
-               bpf_link__destroy(parent_egress2_link);
-       if (!IS_ERR(parent_ingress_link))
-               bpf_link__destroy(parent_ingress_link);
-       if (!IS_ERR(child_egress1_link))
-               bpf_link__destroy(child_egress1_link);
-       if (!IS_ERR(child_egress2_link))
-               bpf_link__destroy(child_egress2_link);
-       if (!IS_ERR(child_ingress_link))
-               bpf_link__destroy(child_ingress_link);
+       bpf_link__destroy(parent_egress1_link);
+       bpf_link__destroy(parent_egress2_link);
+       bpf_link__destroy(parent_ingress_link);
+       bpf_link__destroy(child_egress1_link);
+       bpf_link__destroy(child_egress2_link);
+       bpf_link__destroy(child_ingress_link);
 
        cg_storage_multi_isolated__destroy(obj);
 }
@@ -301,18 +285,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
         */
        parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                         parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress1_link), "parent-egress1-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress1_link)))
+       if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
                goto close_bpf_object;
        parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                         parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_egress2_link), "parent-egress2-cg-attach",
-                 "err %ld", PTR_ERR(parent_egress2_link)))
+       if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
                goto close_bpf_object;
        parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                         parent_cgroup_fd);
-       if (CHECK(IS_ERR(parent_ingress_link), "parent-ingress-cg-attach",
-                 "err %ld", PTR_ERR(parent_ingress_link)))
+       if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
                goto close_bpf_object;
        err = connect_send(CHILD_CGROUP);
        if (CHECK(err, "first-connect-send", "errno %d", errno))
@@ -338,18 +319,15 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
         */
        child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
                                                        child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress1_link), "child-egress1-cg-attach",
-                 "err %ld", PTR_ERR(child_egress1_link)))
+       if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
                goto close_bpf_object;
        child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
                                                        child_cgroup_fd);
-       if (CHECK(IS_ERR(child_egress2_link), "child-egress2-cg-attach",
-                 "err %ld", PTR_ERR(child_egress2_link)))
+       if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
                goto close_bpf_object;
        child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
                                                        child_cgroup_fd);
-       if (CHECK(IS_ERR(child_ingress_link), "child-ingress-cg-attach",
-                 "err %ld", PTR_ERR(child_ingress_link)))
+       if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
                goto close_bpf_object;
        err = connect_send(CHILD_CGROUP);
        if (CHECK(err, "second-connect-send", "errno %d", errno))
@@ -375,18 +353,12 @@ static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
                goto close_bpf_object;
 
 close_bpf_object:
-       if (!IS_ERR(parent_egress1_link))
-               bpf_link__destroy(parent_egress1_link);
-       if (!IS_ERR(parent_egress2_link))
-               bpf_link__destroy(parent_egress2_link);
-       if (!IS_ERR(parent_ingress_link))
-               bpf_link__destroy(parent_ingress_link);
-       if (!IS_ERR(child_egress1_link))
-               bpf_link__destroy(child_egress1_link);
-       if (!IS_ERR(child_egress2_link))
-               bpf_link__destroy(child_egress2_link);
-       if (!IS_ERR(child_ingress_link))
-               bpf_link__destroy(child_ingress_link);
+       bpf_link__destroy(parent_egress1_link);
+       bpf_link__destroy(parent_egress2_link);
+       bpf_link__destroy(parent_ingress_link);
+       bpf_link__destroy(child_egress1_link);
+       bpf_link__destroy(child_egress2_link);
+       bpf_link__destroy(child_ingress_link);
 
        cg_storage_multi_shared__destroy(obj);
 }
index 0a1fc98..20bb883 100644 (file)
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
        prog_cnt = 2;
        CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
                                  BPF_F_QUERY_EFFECTIVE, &attach_flags,
-                                 prog_ids, &prog_cnt) != -1);
+                                 prog_ids, &prog_cnt) >= 0);
        CHECK_FAIL(errno != ENOSPC);
        CHECK_FAIL(prog_cnt != 4);
        /* check that prog_ids are returned even when buffer is too small */
index 736796e..9091524 100644 (file)
@@ -65,8 +65,7 @@ void test_cgroup_link(void)
        for (i = 0; i < cg_nr; i++) {
                links[i] = bpf_program__attach_cgroup(skel->progs.egress,
                                                      cgs[i].fd);
-               if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
-                                i, PTR_ERR(links[i])))
+               if (!ASSERT_OK_PTR(links[i], "cg_attach"))
                        goto cleanup;
        }
 
@@ -121,8 +120,7 @@ void test_cgroup_link(void)
 
        links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
                                                    cgs[last_cg].fd);
-       if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
-                 PTR_ERR(links[last_cg])))
+       if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
                goto cleanup;
 
        ping_and_check(cg_nr + 1, 0);
@@ -147,7 +145,7 @@ void test_cgroup_link(void)
        /* attempt to mix in with multi-attach bpf_link */
        tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
                                              cgs[last_cg].fd);
-       if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+       if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
                bpf_link__destroy(tmp_link);
                goto cleanup;
        }
@@ -165,8 +163,7 @@ void test_cgroup_link(void)
        /* attach back link-based one */
        links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
                                                    cgs[last_cg].fd);
-       if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
-                 PTR_ERR(links[last_cg])))
+       if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
                goto cleanup;
 
        ping_and_check(cg_nr, 0);
@@ -249,8 +246,7 @@ cleanup:
                                 BPF_CGROUP_INET_EGRESS);
 
        for (i = 0; i < cg_nr; i++) {
-               if (!IS_ERR(links[i]))
-                       bpf_link__destroy(links[i]);
+               bpf_link__destroy(links[i]);
        }
        test_cgroup_link__destroy(skel);
 
index 464edc1..b9dc4ec 100644 (file)
@@ -60,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
                goto cleanup;
 
        link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
-       if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "cgroup_attach"))
                goto cleanup;
 
        run_lookup_test(&skel->bss->g_serv_port, out_sk);
index b62a393..012068f 100644 (file)
@@ -53,7 +53,7 @@ static void test_check_mtu_xdp_attach(void)
        prog = skel->progs.xdp_use_helper_basic;
 
        link = bpf_program__attach_xdp(prog, IFINDEX_LO);
-       if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                goto out;
        skel->links.xdp_use_helper_basic = link;
 
index 6077108..d02e064 100644 (file)
@@ -369,8 +369,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
        const char *name;
        int i;
 
-       if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
-           CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+       if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
                btf__free(local_btf);
                btf__free(targ_btf);
                return -EINVAL;
@@ -848,8 +847,7 @@ void test_core_reloc(void)
                }
 
                obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
-               if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
-                         test_case->bpf_obj_file, PTR_ERR(obj)))
+               if (!ASSERT_OK_PTR(obj, "obj_open"))
                        continue;
 
                probe_name = "raw_tracepoint/sys_enter";
@@ -899,8 +897,7 @@ void test_core_reloc(void)
                data->my_pid_tgid = my_pid_tgid;
 
                link = bpf_program__attach_raw_tracepoint(prog, tp_name);
-               if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
-                         PTR_ERR(link)))
+               if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                        goto cleanup;
 
                /* trigger test run */
@@ -941,10 +938,8 @@ cleanup:
                        CHECK_FAIL(munmap(mmap_data, mmap_sz));
                        mmap_data = NULL;
                }
-               if (!IS_ERR_OR_NULL(link)) {
-                       bpf_link__destroy(link);
-                       link = NULL;
-               }
+               bpf_link__destroy(link);
+               link = NULL;
                bpf_object__close(obj);
        }
 }
index 6399084..73b4c76 100644 (file)
@@ -146,10 +146,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 
 close_prog:
        for (i = 0; i < prog_cnt; i++)
-               if (!IS_ERR_OR_NULL(link[i]))
-                       bpf_link__destroy(link[i]);
-       if (!IS_ERR_OR_NULL(obj))
-               bpf_object__close(obj);
+               bpf_link__destroy(link[i]);
+       bpf_object__close(obj);
        bpf_object__close(tgt_obj);
        free(link);
        free(prog);
@@ -231,7 +229,7 @@ static int test_second_attach(struct bpf_object *obj)
                return err;
 
        link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
-       if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+       if (!ASSERT_OK_PTR(link, "second_link"))
                goto out;
 
        err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
@@ -283,9 +281,7 @@ static void test_fmod_ret_freplace(void)
        opts.attach_prog_fd = pkt_fd;
 
        freplace_obj = bpf_object__open_file(freplace_name, &opts);
-       if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
-                 "failed to open %s: %ld\n", freplace_name,
-                 PTR_ERR(freplace_obj)))
+       if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
                goto out;
 
        err = bpf_object__load(freplace_obj);
@@ -294,14 +290,12 @@ static void test_fmod_ret_freplace(void)
 
        prog = bpf_program__next(NULL, freplace_obj);
        freplace_link = bpf_program__attach_trace(prog);
-       if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+       if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
                goto out;
 
        opts.attach_prog_fd = bpf_program__fd(prog);
        fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
-       if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
-                 "failed to open %s: %ld\n", fmod_ret_name,
-                 PTR_ERR(fmod_obj)))
+       if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
                goto out;
 
        err = bpf_object__load(fmod_obj);
@@ -350,9 +344,7 @@ static void test_obj_load_failure_common(const char *obj_file,
                           );
 
        obj = bpf_object__open_file(obj_file, &opts);
-       if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
-                 "failed to open %s: %ld\n", obj_file,
-                 PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open"))
                goto close_prog;
 
        /* It should fail to load the program */
@@ -361,8 +353,7 @@ static void test_obj_load_failure_common(const char *obj_file,
                goto close_prog;
 
 close_prog:
-       if (!IS_ERR_OR_NULL(obj))
-               bpf_object__close(obj);
+       bpf_object__close(obj);
        bpf_object__close(pkt_obj);
 }
 
index cd6dc80..225714f 100644 (file)
@@ -541,7 +541,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
                return;
 
        link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
-       if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_netns"))
                goto out_close;
 
        run_tests_skb_less(tap_fd, skel->maps.last_dissection);
index 172c586..3931ede 100644 (file)
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
        /* Expect failure creating link when another link exists */
        errno = 0;
        link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
-       if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+       if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
                perror("bpf_prog_attach(prog2) expected E2BIG");
-       if (link2 != -1)
+       if (link2 >= 0)
                close(link2);
        CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
 
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
        /* Expect failure creating link when prog attached */
        errno = 0;
        link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
-       if (CHECK_FAIL(link != -1 || errno != EEXIST))
+       if (CHECK_FAIL(link >= 0 || errno != EEXIST))
                perror("bpf_link_create(prog2) expected EEXIST");
-       if (link != -1)
+       if (link >= 0)
                close(link);
        CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
 
@@ -623,7 +623,7 @@ static void run_tests(int netns)
        }
 out_close:
        for (i = 0; i < ARRAY_SIZE(progs); i++) {
-               if (progs[i] != -1)
+               if (progs[i] >= 0)
                        CHECK_FAIL(close(progs[i]));
        }
 }
index 9257222..522237a 100644 (file)
@@ -121,12 +121,12 @@ void test_get_stack_raw_tp(void)
                goto close_prog;
 
        link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
-       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                goto close_prog;
 
        pb_opts.sample_cb = get_stack_print_output;
        pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                goto close_prog;
 
        /* trigger some syscall action */
@@ -141,9 +141,7 @@ void test_get_stack_raw_tp(void)
        }
 
 close_prog:
-       if (!IS_ERR_OR_NULL(link))
-               bpf_link__destroy(link);
-       if (!IS_ERR_OR_NULL(pb))
-               perf_buffer__free(pb);
+       bpf_link__destroy(link);
+       perf_buffer__free(pb);
        bpf_object__close(obj);
 }
index d884b2e..8d5a602 100644 (file)
@@ -48,8 +48,7 @@ void test_get_stackid_cannot_attach(void)
 
        skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                           pmu_fd);
-       CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_no_callchain",
-             "should have failed\n");
+       ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
        close(pmu_fd);
 
        /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
@@ -65,8 +64,7 @@ void test_get_stackid_cannot_attach(void)
 
        skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                           pmu_fd);
-       CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event_callchain",
-             "err: %ld\n", PTR_ERR(skel->links.oncpu));
+       ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
        close(pmu_fd);
 
        /* add exclude_callchain_kernel, attach should fail */
@@ -82,8 +80,7 @@ void test_get_stackid_cannot_attach(void)
 
        skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                           pmu_fd);
-       CHECK(!IS_ERR(skel->links.oncpu), "attach_perf_event_exclude_callchain_kernel",
-             "should have failed\n");
+       ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
        close(pmu_fd);
 
 cleanup:
index 428d488..4747ab1 100644 (file)
@@ -48,8 +48,7 @@ static void test_hashmap_generic(void)
        struct hashmap *map;
 
        map = hashmap__new(hash_fn, equal_fn, NULL);
-       if (CHECK(IS_ERR(map), "hashmap__new",
-                 "failed to create map: %ld\n", PTR_ERR(map)))
+       if (!ASSERT_OK_PTR(map, "hashmap__new"))
                return;
 
        for (i = 0; i < ELEM_CNT; i++) {
@@ -267,8 +266,7 @@ static void test_hashmap_multimap(void)
 
        /* force collisions */
        map = hashmap__new(collision_hash_fn, equal_fn, NULL);
-       if (CHECK(IS_ERR(map), "hashmap__new",
-                 "failed to create map: %ld\n", PTR_ERR(map)))
+       if (!ASSERT_OK_PTR(map, "hashmap__new"))
                return;
 
        /* set up multimap:
@@ -339,8 +337,7 @@ static void test_hashmap_empty()
 
        /* force collisions */
        map = hashmap__new(hash_fn, equal_fn, NULL);
-       if (CHECK(IS_ERR(map), "hashmap__new",
-                 "failed to create map: %ld\n", PTR_ERR(map)))
+       if (!ASSERT_OK_PTR(map, "hashmap__new"))
                goto cleanup;
 
        if (CHECK(hashmap__size(map) != 0, "hashmap__size",
index d651079..ddfb6bf 100644 (file)
@@ -97,15 +97,13 @@ void test_kfree_skb(void)
                goto close_prog;
 
        link = bpf_program__attach_raw_tracepoint(prog, NULL);
-       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                goto close_prog;
        link_fentry = bpf_program__attach_trace(fentry);
-       if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
-                 PTR_ERR(link_fentry)))
+       if (!ASSERT_OK_PTR(link_fentry, "attach fentry"))
                goto close_prog;
        link_fexit = bpf_program__attach_trace(fexit);
-       if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
-                 PTR_ERR(link_fexit)))
+       if (!ASSERT_OK_PTR(link_fexit, "attach fexit"))
                goto close_prog;
 
        perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
@@ -116,7 +114,7 @@ void test_kfree_skb(void)
        pb_opts.sample_cb = on_sample;
        pb_opts.ctx = &passed;
        pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                goto close_prog;
 
        memcpy(skb.cb, &cb, sizeof(cb));
@@ -144,12 +142,9 @@ void test_kfree_skb(void)
        CHECK_FAIL(!test_ok[0] || !test_ok[1]);
 close_prog:
        perf_buffer__free(pb);
-       if (!IS_ERR_OR_NULL(link))
-               bpf_link__destroy(link);
-       if (!IS_ERR_OR_NULL(link_fentry))
-               bpf_link__destroy(link_fentry);
-       if (!IS_ERR_OR_NULL(link_fexit))
-               bpf_link__destroy(link_fexit);
+       bpf_link__destroy(link);
+       bpf_link__destroy(link_fentry);
+       bpf_link__destroy(link_fexit);
        bpf_object__close(obj);
        bpf_object__close(obj2);
 }
index b58b775..67bebd3 100644 (file)
@@ -87,8 +87,7 @@ void test_ksyms_btf(void)
        struct btf *btf;
 
        btf = libbpf_find_kernel_btf();
-       if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
-                 PTR_ERR(btf)))
+       if (!ASSERT_OK_PTR(btf, "btf_exists"))
                return;
 
        percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
index a743288..6fc97c4 100644 (file)
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
        int err, i;
 
        link = bpf_program__attach(prog);
-       if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                goto cleanup;
 
        bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
 
        /* re-open link from BPFFS */
        link = bpf_link__open(link_pin_path);
-       if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_open"))
                goto cleanup;
 
        CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
        CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
 
 cleanup:
-       if (!IS_ERR(link))
-               bpf_link__destroy(link);
+       bpf_link__destroy(link);
 }
 
 void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644 (file)
index 0000000..beebfa9
--- /dev/null
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+       __u64 key, value = START_VALUE;
+       int err;
+
+       for (key = 1; key < MAX_ENTRIES + 1; key++) {
+               err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+       __u64 key, value[nr_cpus];
+       int i, err;
+
+       for (i = 0; i < nr_cpus; i++)
+               value[i] = START_VALUE;
+
+       for (key = 1; key < MAX_ENTRIES + 1; key++) {
+               err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+                                                int *map_fd)
+{
+       struct test_lookup_and_delete *skel;
+       int err;
+
+       skel = test_lookup_and_delete__open();
+       if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+               return NULL;
+
+       err = bpf_map__set_type(skel->maps.hash_map, map_type);
+       if (!ASSERT_OK(err, "bpf_map__set_type"))
+               goto cleanup;
+
+       err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+       if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+               goto cleanup;
+
+       err = test_lookup_and_delete__load(skel);
+       if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+               goto cleanup;
+
+       *map_fd = bpf_map__fd(skel->maps.hash_map);
+       if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+               goto cleanup;
+
+       return skel;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+       return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+                     __u64 value)
+{
+       int err;
+
+       skel->bss->set_pid = getpid();
+       skel->bss->set_key = key;
+       skel->bss->set_value = value;
+
+       err = test_lookup_and_delete__attach(skel);
+       if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+               return -1;
+
+       syscall(__NR_getpgid);
+
+       test_lookup_and_delete__detach(skel);
+
+       return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, value;
+       int map_fd, err;
+
+       /* Setup program and fill the map. */
+       skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values(map_fd);
+       if (!ASSERT_OK(err, "fill_values"))
+               goto cleanup;
+
+       /* Lookup and delete element. */
+       key = 1;
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+               goto cleanup;
+
+       /* Fetched value should match the initially set value. */
+       if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+                 "unexpected value=%lld\n", value))
+               goto cleanup;
+
+       /* Check that the entry is non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, val, value[nr_cpus];
+       int map_fd, err, i;
+
+       /* Setup program and fill the map. */
+       skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values_percpu(map_fd);
+       if (!ASSERT_OK(err, "fill_values_percpu"))
+               goto cleanup;
+
+       /* Lookup and delete element. */
+       key = 1;
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+               goto cleanup;
+
+       for (i = 0; i < nr_cpus; i++) {
+               val = value[i];
+
+               /* Fetched value should match the initially set value. */
+               if (CHECK(val != START_VALUE, "map value",
+                         "unexpected for cpu %d: %lld\n", i, val))
+                       goto cleanup;
+       }
+
+       /* Check that the entry is non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, value;
+       int map_fd, err;
+
+       /* Setup program and fill the LRU map. */
+       skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values(map_fd);
+       if (!ASSERT_OK(err, "fill_values"))
+               goto cleanup;
+
+       /* Insert new element at key=3, should reuse LRU element. */
+       key = 3;
+       err = trigger_tp(skel, key, NEW_VALUE);
+       if (!ASSERT_OK(err, "trigger_tp"))
+               goto cleanup;
+
+       /* Lookup and delete element 3. */
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+               goto cleanup;
+
+       /* Value should match the new value. */
+       if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+                 "unexpected value=%lld\n", value))
+               goto cleanup;
+
+       /* Check that entries 3 and 1 are non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+       key = 1;
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+       struct test_lookup_and_delete *skel;
+       __u64 key, val, value[nr_cpus];
+       int map_fd, err, i, cpucnt = 0;
+
+       /* Setup program and fill the LRU map. */
+       skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+       if (!ASSERT_OK_PTR(skel, "setup_prog"))
+               return;
+
+       err = fill_values_percpu(map_fd);
+       if (!ASSERT_OK(err, "fill_values_percpu"))
+               goto cleanup;
+
+       /* Insert new element at key=3, should reuse LRU element 1. */
+       key = 3;
+       err = trigger_tp(skel, key, NEW_VALUE);
+       if (!ASSERT_OK(err, "trigger_tp"))
+               goto cleanup;
+
+       /* Clean value. */
+       for (i = 0; i < nr_cpus; i++)
+               value[i] = 0;
+
+       /* Lookup and delete element 3. */
+       err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+       if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+               goto cleanup;
+       }
+
+       /* Check if only one CPU has set the value. */
+       for (i = 0; i < nr_cpus; i++) {
+               val = value[i];
+               if (val) {
+                       if (CHECK(val != NEW_VALUE, "map value",
+                                 "unexpected for cpu %d: %lld\n", i, val))
+                               goto cleanup;
+                       cpucnt++;
+               }
+       }
+       if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+                 cpucnt))
+               goto cleanup;
+
+       /* Check that entries 3 and 1 are non existent. */
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+       key = 1;
+       err = bpf_map_lookup_elem(map_fd, &key, &value);
+       if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+               goto cleanup;
+
+cleanup:
+       test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+       nr_cpus = bpf_num_possible_cpus();
+
+       if (test__start_subtest("lookup_and_delete"))
+               test_lookup_and_delete_hash();
+       if (test__start_subtest("lookup_and_delete_percpu"))
+               test_lookup_and_delete_percpu_hash();
+       if (test__start_subtest("lookup_and_delete_lru"))
+               test_lookup_and_delete_lru_hash();
+       if (test__start_subtest("lookup_and_delete_lru_percpu"))
+               test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644 (file)
index 0000000..59adb47
--- /dev/null
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ *   1. call listen() for 4 server sockets.
+ *   2. call connect() for 25 client sockets.
+ *   3. call listen() for 1 server socket. (migration target)
+ *   4. update a map to migrate all child sockets
+ *        to the last server socket (migrate_map[cookie] = 4)
+ *   5. call shutdown() for first 4 server sockets
+ *        and migrate the requests in the accept queue
+ *        to the last server socket.
+ *   6. call listen() for the second server socket.
+ *   7. call shutdown() for the last server
+ *        and migrate the requests in the accept queue
+ *        to the second server socket.
+ *   8. call listen() for the last server.
+ *   9. call shutdown() for the second server
+ *        and migrate the requests in the accept queue
+ *        to the last server socket.
+ *  10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+       const char *name;
+       __s64 servers[NR_SERVERS];
+       __s64 clients[NR_CLIENTS];
+       struct sockaddr_storage addr;
+       socklen_t addrlen;
+       int family;
+       int state;
+       bool drop_ack;
+       bool expire_synack_timer;
+       bool fastopen;
+       struct bpf_link *link;
+} test_cases[] = {
+       {
+               .name = "IPv4 TCP_ESTABLISHED  inet_csk_listen_stop",
+               .family = AF_INET,
+               .state = BPF_TCP_ESTABLISHED,
+               .drop_ack = false,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv4 TCP_SYN_RECV     inet_csk_listen_stop",
+               .family = AF_INET,
+               .state = BPF_TCP_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = true,
+       },
+       {
+               .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+               .family = AF_INET,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = true,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+               .family = AF_INET,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv6 TCP_ESTABLISHED  inet_csk_listen_stop",
+               .family = AF_INET6,
+               .state = BPF_TCP_ESTABLISHED,
+               .drop_ack = false,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv6 TCP_SYN_RECV     inet_csk_listen_stop",
+               .family = AF_INET6,
+               .state = BPF_TCP_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = true,
+       },
+       {
+               .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+               .family = AF_INET6,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = true,
+               .fastopen = false,
+       },
+       {
+               .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+               .family = AF_INET6,
+               .state = BPF_TCP_NEW_SYN_RECV,
+               .drop_ack = true,
+               .expire_synack_timer = false,
+               .fastopen = false,
+       }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++)
+               fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++) {
+               if (fds[i] != -1) {
+                       close(fds[i]);
+                       fds[i] = -1;
+               }
+       }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+       int err = 0, fd, len;
+
+       fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+       if (!ASSERT_NEQ(fd, -1, "open"))
+               return -1;
+
+       if (restore) {
+               len = write(fd, buf, *saved_len);
+               if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+                       err = -1;
+       } else {
+               *saved_len = read(fd, buf, size);
+               if (!ASSERT_GE(*saved_len, 1, "read")) {
+                       err = -1;
+                       goto close;
+               }
+
+               err = lseek(fd, 0, SEEK_SET);
+               if (!ASSERT_OK(err, "lseek"))
+                       goto close;
+
+               /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+                *  TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+                */
+               len = write(fd, "519", 3);
+               if (!ASSERT_EQ(len, 3, "write - setup"))
+                       err = -1;
+       }
+
+close:
+       close(fd);
+
+       return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+                   struct test_migrate_reuseport *skel)
+{
+       if (test_case->family == AF_INET)
+               skel->bss->server_port = ((struct sockaddr_in *)
+                                         &test_case->addr)->sin_port;
+       else
+               skel->bss->server_port = ((struct sockaddr_in6 *)
+                                         &test_case->addr)->sin6_port;
+
+       test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+                                                 IFINDEX_LO);
+       if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+               return -1;
+
+       return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+       int err;
+
+       err = bpf_link__detach(test_case->link);
+       if (!ASSERT_OK(err, "bpf_link__detach"))
+               return -1;
+
+       test_case->link = NULL;
+
+       return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+                        struct test_migrate_reuseport *skel)
+{
+       int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+       prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+       make_sockaddr(test_case->family,
+                     test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+                     &test_case->addr, &test_case->addrlen);
+
+       for (i = 0; i < NR_SERVERS; i++) {
+               test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+                                              IPPROTO_TCP);
+               if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+                       return -1;
+
+               err = setsockopt(test_case->servers[i], SOL_SOCKET,
+                                SO_REUSEPORT, &reuseport, sizeof(reuseport));
+               if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+                       return -1;
+
+               err = bind(test_case->servers[i],
+                          (struct sockaddr *)&test_case->addr,
+                          test_case->addrlen);
+               if (!ASSERT_OK(err, "bind"))
+                       return -1;
+
+               if (i == 0) {
+                       err = setsockopt(test_case->servers[i], SOL_SOCKET,
+                                        SO_ATTACH_REUSEPORT_EBPF,
+                                        &prog_fd, sizeof(prog_fd));
+                       if (!ASSERT_OK(err,
+                                      "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+                               return -1;
+
+                       err = getsockname(test_case->servers[i],
+                                         (struct sockaddr *)&test_case->addr,
+                                         &test_case->addrlen);
+                       if (!ASSERT_OK(err, "getsockname"))
+                               return -1;
+               }
+
+               if (test_case->fastopen) {
+                       err = setsockopt(test_case->servers[i],
+                                        SOL_TCP, TCP_FASTOPEN,
+                                        &qlen, sizeof(qlen));
+                       if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+                               return -1;
+               }
+
+               /* All requests will be tied to the first four listeners */
+               if (i != MIGRATED_TO) {
+                       err = listen(test_case->servers[i], qlen);
+                       if (!ASSERT_OK(err, "listen"))
+                               return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+       char buf[MSGLEN] = MSG;
+       int i, err;
+
+       for (i = 0; i < NR_CLIENTS; i++) {
+               test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+                                              IPPROTO_TCP);
+               if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+                       return -1;
+
+               /* The attached XDP program drops only the final ACK, so
+                * clients will transition to TCP_ESTABLISHED immediately.
+                */
+               err = settimeo(test_case->clients[i], 100);
+               if (!ASSERT_OK(err, "settimeo"))
+                       return -1;
+
+               if (test_case->fastopen) {
+                       int fastopen = 1;
+
+                       err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+                                        TCP_FASTOPEN_CONNECT, &fastopen,
+                                        sizeof(fastopen));
+                       if (!ASSERT_OK(err,
+                                      "setsockopt - TCP_FASTOPEN_CONNECT"))
+                               return -1;
+               }
+
+               err = connect(test_case->clients[i],
+                             (struct sockaddr *)&test_case->addr,
+                             test_case->addrlen);
+               if (!ASSERT_OK(err, "connect"))
+                       return -1;
+
+               err = write(test_case->clients[i], buf, MSGLEN);
+               if (!ASSERT_EQ(err, MSGLEN, "write"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+                      struct test_migrate_reuseport *skel)
+{
+       int i, err, migrated_to = MIGRATED_TO;
+       int reuseport_map_fd, migrate_map_fd;
+       __u64 value;
+
+       reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+       migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+       for (i = 0; i < NR_SERVERS; i++) {
+               value = (__u64)test_case->servers[i];
+               err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+                                         BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+                       return -1;
+
+               err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+               if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+                       return -1;
+
+               err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+                                         BPF_NOEXIST);
+               if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+       int i, err;
+
+       /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+        * to the last listener based on eBPF.
+        */
+       for (i = 0; i < MIGRATED_TO; i++) {
+               err = shutdown(test_case->servers[i], SHUT_RDWR);
+               if (!ASSERT_OK(err, "shutdown"))
+                       return -1;
+       }
+
+       /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+       if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+               return 0;
+
+       /* Note that we use the second listener instead of the
+        * first one here.
+        *
+        * The fist listener is bind()ed with port 0 and,
+        * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+        * calling listen() again will bind() the first listener
+        * on a new ephemeral port and detach it from the existing
+        * reuseport group.  (See: __inet_bind(), tcp_set_state())
+        *
+        * OTOH, the second one is bind()ed with a specific port,
+        * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+        * resurrect the listener on the existing reuseport group.
+        */
+       err = listen(test_case->servers[1], QLEN);
+       if (!ASSERT_OK(err, "listen"))
+               return -1;
+
+       /* Migrate from the last listener to the second one.
+        *
+        * All listeners were detached out of the reuseport_map,
+        * so migration will be done by kernel random pick from here.
+        */
+       err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+       if (!ASSERT_OK(err, "shutdown"))
+               return -1;
+
+       /* Back to the existing reuseport group */
+       err = listen(test_case->servers[MIGRATED_TO], QLEN);
+       if (!ASSERT_OK(err, "listen"))
+               return -1;
+
+       /* Migrate back to the last one from the second one */
+       err = shutdown(test_case->servers[1], SHUT_RDWR);
+       if (!ASSERT_OK(err, "shutdown"))
+               return -1;
+
+       return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+                          struct test_migrate_reuseport *skel)
+{
+       struct sockaddr_storage addr;
+       socklen_t len = sizeof(addr);
+       int err, cnt = 0, client;
+       char buf[MSGLEN];
+
+       err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+       if (!ASSERT_OK(err, "settimeo"))
+               goto out;
+
+       for (; cnt < NR_CLIENTS; cnt++) {
+               client = accept(test_case->servers[MIGRATED_TO],
+                               (struct sockaddr *)&addr, &len);
+               if (!ASSERT_NEQ(client, -1, "accept"))
+                       goto out;
+
+               memset(buf, 0, MSGLEN);
+               read(client, &buf, MSGLEN);
+               close(client);
+
+               if (!ASSERT_STREQ(buf, MSG, "read"))
+                       goto out;
+       }
+
+out:
+       ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+       switch (test_case->state) {
+       case BPF_TCP_ESTABLISHED:
+               cnt = skel->bss->migrated_at_close;
+               break;
+       case BPF_TCP_SYN_RECV:
+               cnt = skel->bss->migrated_at_close_fastopen;
+               break;
+       case BPF_TCP_NEW_SYN_RECV:
+               if (test_case->expire_synack_timer)
+                       cnt = skel->bss->migrated_at_send_synack;
+               else
+                       cnt = skel->bss->migrated_at_recv_ack;
+               break;
+       default:
+               cnt = 0;
+       }
+
+       ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+                    struct test_migrate_reuseport *skel)
+{
+       int err, saved_len;
+       char buf[16];
+
+       skel->bss->migrated_at_close = 0;
+       skel->bss->migrated_at_close_fastopen = 0;
+       skel->bss->migrated_at_send_synack = 0;
+       skel->bss->migrated_at_recv_ack = 0;
+
+       init_fds(test_case->servers, NR_SERVERS);
+       init_fds(test_case->clients, NR_CLIENTS);
+
+       if (test_case->fastopen) {
+               memset(buf, 0, sizeof(buf));
+
+               err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+               if (!ASSERT_OK(err, "setup_fastopen - setup"))
+                       return;
+       }
+
+       err = start_servers(test_case, skel);
+       if (!ASSERT_OK(err, "start_servers"))
+               goto close_servers;
+
+       if (test_case->drop_ack) {
+               /* Drop the final ACK of the 3-way handshake and stick the
+                * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+                */
+               err = drop_ack(test_case, skel);
+               if (!ASSERT_OK(err, "drop_ack"))
+                       goto close_servers;
+       }
+
+       /* Tie requests to the first four listners */
+       err = start_clients(test_case);
+       if (!ASSERT_OK(err, "start_clients"))
+               goto close_clients;
+
+       err = listen(test_case->servers[MIGRATED_TO], QLEN);
+       if (!ASSERT_OK(err, "listen"))
+               goto close_clients;
+
+       err = update_maps(test_case, skel);
+       if (!ASSERT_OK(err, "fill_maps"))
+               goto close_clients;
+
+       /* Migrate the requests in the accept queue only.
+        * TCP_NEW_SYN_RECV requests are not migrated at this point.
+        */
+       err = migrate_dance(test_case);
+       if (!ASSERT_OK(err, "migrate_dance"))
+               goto close_clients;
+
+       if (test_case->expire_synack_timer) {
+               /* Wait for SYN+ACK timers to expire so that
+                * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+                */
+               sleep(1);
+       }
+
+       if (test_case->link) {
+               /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+               err = pass_ack(test_case);
+               if (!ASSERT_OK(err, "pass_ack"))
+                       goto close_clients;
+       }
+
+       count_requests(test_case, skel);
+
+close_clients:
+       close_fds(test_case->clients, NR_CLIENTS);
+
+       if (test_case->link) {
+               err = pass_ack(test_case);
+               ASSERT_OK(err, "pass_ack - clean up");
+       }
+
+close_servers:
+       close_fds(test_case->servers, NR_SERVERS);
+
+       if (test_case->fastopen) {
+               err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+               ASSERT_OK(err, "setup_fastopen - restore");
+       }
+}
+
+void test_migrate_reuseport(void)
+{
+       struct test_migrate_reuseport *skel;
+       int i;
+
+       skel = test_migrate_reuseport__open_and_load();
+       if (!ASSERT_OK_PTR(skel, "open_and_load"))
+               return;
+
+       for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+               test__start_subtest(test_cases[i].name);
+               run_test(&test_cases[i], skel);
+       }
+
+       test_migrate_reuseport__destroy(skel);
+}
index e178416..6194b77 100644 (file)
@@ -38,13 +38,13 @@ void test_obj_name(void)
 
                fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
                CHECK((tests[i].success && fd < 0) ||
-                     (!tests[i].success && fd != -1) ||
+                     (!tests[i].success && fd >= 0) ||
                      (!tests[i].success && errno != tests[i].expected_errno),
                      "check-bpf-prog-name",
                      "fd %d(%d) errno %d(%d)\n",
                       fd, tests[i].success, errno, tests[i].expected_errno);
 
-               if (fd != -1)
+               if (fd >= 0)
                        close(fd);
 
                /* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
                memcpy(attr.map_name, tests[i].name, ncopy);
                fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
                CHECK((tests[i].success && fd < 0) ||
-                     (!tests[i].success && fd != -1) ||
+                     (!tests[i].success && fd >= 0) ||
                      (!tests[i].success && errno != tests[i].expected_errno),
                      "check-bpf-map-name",
                      "fd %d(%d) errno %d(%d)\n",
                      fd, tests[i].success, errno, tests[i].expected_errno);
 
-               if (fd != -1)
+               if (fd >= 0)
                        close(fd);
        }
 }
index e35c444..12c4f45 100644 (file)
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
 
        /* attach perf_event */
        link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
-       if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_perf_event"))
                goto out_destroy_skel;
 
        /* generate some branches on cpu 0 */
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
         * Some setups don't support branch records (virtual machines, !x86),
         * so skip test in this case.
         */
-       if (pfd == -1) {
+       if (pfd < 0) {
                if (errno == ENOENT || errno == EOPNOTSUPP) {
                        printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
                               __func__);
index ca9f089..6490e96 100644 (file)
@@ -80,7 +80,7 @@ void test_perf_buffer(void)
        pb_opts.sample_cb = on_sample;
        pb_opts.ctx = &cpu_seen;
        pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                goto out_close;
 
        CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
index 72c3690..33144c9 100644 (file)
@@ -97,8 +97,7 @@ void test_perf_event_stackmap(void)
 
        skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                           pmu_fd);
-       if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
-                 "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+       if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
                close(pmu_fd);
                goto cleanup;
        }
index 7aecfd9..95bd120 100644 (file)
@@ -15,7 +15,7 @@ void test_probe_user(void)
        static const int zero = 0;
 
        obj = bpf_object__open_file(obj_file, &opts);
-       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open_file"))
                return;
 
        kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
@@ -33,11 +33,8 @@ void test_probe_user(void)
                goto cleanup;
 
        kprobe_link = bpf_program__attach(kprobe_prog);
-       if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
-                 "err %ld\n", PTR_ERR(kprobe_link))) {
-               kprobe_link = NULL;
+       if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
                goto cleanup;
-       }
 
        memset(&curr, 0, sizeof(curr));
        in->sin_family = AF_INET;
index 131d7f7..89fc98f 100644 (file)
@@ -46,7 +46,7 @@ void test_prog_run_xattr(void)
        tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
 
        err = bpf_prog_test_run_xattr(&tattr);
-       CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
+       CHECK_ATTR(err >= 0 || errno != ENOSPC || tattr.retval, "run",
              "err %d errno %d retval %d\n", err, errno, tattr.retval);
 
        CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
@@ -78,6 +78,6 @@ void test_prog_run_xattr(void)
 cleanup:
        if (skel)
                test_pkt_access__destroy(skel);
-       if (stats_fd != -1)
+       if (stats_fd >= 0)
                close(stats_fd);
 }
index c5fb191..41720a6 100644 (file)
@@ -77,7 +77,7 @@ void test_raw_tp_test_run(void)
        /* invalid cpu ID should fail with ENXIO */
        opts.cpu = 0xffffffff;
        err = bpf_prog_test_run_opts(prog_fd, &opts);
-       CHECK(err != -1 || errno != ENXIO,
+       CHECK(err >= 0 || errno != ENXIO,
              "test_run_opts_fail",
              "should failed with ENXIO\n");
 
@@ -85,7 +85,7 @@ void test_raw_tp_test_run(void)
        opts.cpu = 1;
        opts.flags = 0;
        err = bpf_prog_test_run_opts(prog_fd, &opts);
-       CHECK(err != -1 || errno != EINVAL,
+       CHECK(err >= 0 || errno != EINVAL,
              "test_run_opts_fail",
              "should failed with EINVAL\n");
 
index 563e121..5f9eaa3 100644 (file)
@@ -30,7 +30,7 @@ void test_rdonly_maps(void)
        struct bss bss;
 
        obj = bpf_object__open_file(file, NULL);
-       if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open"))
                return;
 
        err = bpf_object__load(obj);
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
                        goto cleanup;
 
                link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
-               if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
-                         t->prog_name, PTR_ERR(link))) {
-                       link = NULL;
+               if (!ASSERT_OK_PTR(link, "attach_prog"))
                        goto cleanup;
-               }
 
                /* trigger probe */
                usleep(1);
index ac1ee10..de26881 100644 (file)
@@ -15,7 +15,7 @@ void test_reference_tracking(void)
        int err = 0;
 
        obj = bpf_object__open_file(file, &open_opts);
-       if (CHECK_FAIL(IS_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open_file"))
                return;
 
        if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
index d3c2de2..f623613 100644 (file)
@@ -76,7 +76,7 @@ __resolve_symbol(struct btf *btf, int type_id)
        }
 
        for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
-               if (test_symbols[i].id != -1)
+               if (test_symbols[i].id >= 0)
                        continue;
 
                if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
index cef63e7..167cd8a 100644 (file)
@@ -63,7 +63,7 @@ void test_ringbuf_multi(void)
                goto cleanup;
 
        proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
-       if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+       if (CHECK(proto_fd < 0, "bpf_create_map", "bpf_create_map failed\n"))
                goto cleanup;
 
        err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
index 821b414..4efd337 100644 (file)
@@ -78,7 +78,7 @@ static int create_maps(enum bpf_map_type inner_type)
        attr.max_entries = REUSEPORT_ARRAY_SIZE;
 
        reuseport_array = bpf_create_map_xattr(&attr);
-       RET_ERR(reuseport_array == -1, "creating reuseport_array",
+       RET_ERR(reuseport_array < 0, "creating reuseport_array",
                "reuseport_array:%d errno:%d\n", reuseport_array, errno);
 
        /* Creating outer_map */
@@ -89,7 +89,7 @@ static int create_maps(enum bpf_map_type inner_type)
        attr.max_entries = 1;
        attr.inner_map_fd = reuseport_array;
        outer_map = bpf_create_map_xattr(&attr);
-       RET_ERR(outer_map == -1, "creating outer_map",
+       RET_ERR(outer_map < 0, "creating outer_map",
                "outer_map:%d errno:%d\n", outer_map, errno);
 
        return 0;
@@ -102,8 +102,9 @@ static int prepare_bpf_obj(void)
        int err;
 
        obj = bpf_object__open("test_select_reuseport_kern.o");
-       RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
-               "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+       err = libbpf_get_error(obj);
+       RET_ERR(err, "open test_select_reuseport_kern.o",
+               "obj:%p PTR_ERR(obj):%d\n", obj, err);
 
        map = bpf_object__find_map_by_name(obj, "outer_map");
        RET_ERR(!map, "find outer_map", "!map\n");
@@ -116,31 +117,31 @@ static int prepare_bpf_obj(void)
        prog = bpf_program__next(NULL, obj);
        RET_ERR(!prog, "get first bpf_program", "!prog\n");
        select_by_skb_data_prog = bpf_program__fd(prog);
-       RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+       RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
                "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
 
        map = bpf_object__find_map_by_name(obj, "result_map");
        RET_ERR(!map, "find result_map", "!map\n");
        result_map = bpf_map__fd(map);
-       RET_ERR(result_map == -1, "get result_map fd",
+       RET_ERR(result_map < 0, "get result_map fd",
                "result_map:%d\n", result_map);
 
        map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
        RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
        tmp_index_ovr_map = bpf_map__fd(map);
-       RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+       RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
                "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
 
        map = bpf_object__find_map_by_name(obj, "linum_map");
        RET_ERR(!map, "find linum_map", "!map\n");
        linum_map = bpf_map__fd(map);
-       RET_ERR(linum_map == -1, "get linum_map fd",
+       RET_ERR(linum_map < 0, "get linum_map fd",
                "linum_map:%d\n", linum_map);
 
        map = bpf_object__find_map_by_name(obj, "data_check_map");
        RET_ERR(!map, "find data_check_map", "!map\n");
        data_check_map = bpf_map__fd(map);
-       RET_ERR(data_check_map == -1, "get data_check_map fd",
+       RET_ERR(data_check_map < 0, "get data_check_map fd",
                "data_check_map:%d\n", data_check_map);
 
        return 0;
@@ -237,7 +238,7 @@ static long get_linum(void)
        int err;
 
        err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
-       RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+       RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
                err, errno);
 
        return linum;
@@ -254,11 +255,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
        addrlen = sizeof(cli_sa);
        err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
                          &addrlen);
-       RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+       RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
               err, errno);
 
        err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
-       RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+       RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
               err, errno);
 
        if (type == SOCK_STREAM) {
@@ -347,7 +348,7 @@ static void check_results(void)
 
        for (i = 0; i < NR_RESULTS; i++) {
                err = bpf_map_lookup_elem(result_map, &i, &results[i]);
-               RET_IF(err == -1, "lookup_elem(result_map)",
+               RET_IF(err < 0, "lookup_elem(result_map)",
                       "i:%u err:%d errno:%d\n", i, err, errno);
        }
 
@@ -524,12 +525,12 @@ static void test_syncookie(int type, sa_family_t family)
         */
        err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
                                  &tmp_index, BPF_ANY);
-       RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+       RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
               "err:%d errno:%d\n", err, errno);
        do_test(type, family, &cmd, PASS);
        err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
                                  &tmp_index);
-       RET_IF(err == -1 || tmp_index != -1,
+       RET_IF(err < 0 || tmp_index >= 0,
               "lookup_elem(tmp_index_ovr_map)",
               "err:%d errno:%d tmp_index:%d\n",
               err, errno, tmp_index);
@@ -569,7 +570,7 @@ static void test_detach_bpf(int type, sa_family_t family)
 
        for (i = 0; i < NR_RESULTS; i++) {
                err = bpf_map_lookup_elem(result_map, &i, &tmp);
-               RET_IF(err == -1, "lookup_elem(result_map)",
+               RET_IF(err < 0, "lookup_elem(result_map)",
                       "i:%u err:%d errno:%d\n", i, err, errno);
                nr_run_before += tmp;
        }
@@ -584,7 +585,7 @@ static void test_detach_bpf(int type, sa_family_t family)
 
        for (i = 0; i < NR_RESULTS; i++) {
                err = bpf_map_lookup_elem(result_map, &i, &tmp);
-               RET_IF(err == -1, "lookup_elem(result_map)",
+               RET_IF(err < 0, "lookup_elem(result_map)",
                       "i:%u err:%d errno:%d\n", i, err, errno);
                nr_run_after += tmp;
        }
@@ -632,24 +633,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
                                         SO_ATTACH_REUSEPORT_EBPF,
                                         &select_by_skb_data_prog,
                                         sizeof(select_by_skb_data_prog));
-                       RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+                       RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
                               "err:%d errno:%d\n", err, errno);
                }
 
                err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
-               RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+               RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
                       i, err, errno);
 
                if (type == SOCK_STREAM) {
                        err = listen(sk_fds[i], 10);
-                       RET_IF(err == -1, "listen()",
+                       RET_IF(err < 0, "listen()",
                               "sk_fds[%d] err:%d errno:%d\n",
                               i, err, errno);
                }
 
                err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
                                          BPF_NOEXIST);
-               RET_IF(err == -1, "update_elem(reuseport_array)",
+               RET_IF(err < 0, "update_elem(reuseport_array)",
                       "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
 
                if (i == first) {
@@ -682,7 +683,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
        prepare_sk_fds(type, family, inany);
        err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
                                  BPF_ANY);
-       RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+       RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
               "err:%d errno:%d\n", err, errno);
 
        /* Install reuseport_array to outer_map? */
@@ -691,7 +692,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
 
        err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
                                  BPF_ANY);
-       RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+       RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
               "err:%d errno:%d\n", err, errno);
 }
 
@@ -720,18 +721,18 @@ static void cleanup_per_test(bool no_inner_map)
                return;
 
        err = bpf_map_delete_elem(outer_map, &index_zero);
-       RET_IF(err == -1, "delete_elem(outer_map)",
+       RET_IF(err < 0, "delete_elem(outer_map)",
               "err:%d errno:%d\n", err, errno);
 }
 
 static void cleanup(void)
 {
-       if (outer_map != -1) {
+       if (outer_map >= 0) {
                close(outer_map);
                outer_map = -1;
        }
 
-       if (reuseport_array != -1) {
+       if (reuseport_array >= 0) {
                close(reuseport_array);
                reuseport_array = -1;
        }
index a1eade5..023cc53 100644 (file)
@@ -91,8 +91,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
 
                skel->links.send_signal_perf =
                        bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
-               if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
-                         "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+               if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
                        goto disable_pmu;
        }
 
index 45c82db..aee4154 100644 (file)
@@ -480,7 +480,7 @@ static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
        }
 
        link = bpf_program__attach_netns(prog, net_fd);
-       if (CHECK(IS_ERR(link), "bpf_program__attach_netns", "failed\n")) {
+       if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
                errno = -PTR_ERR(link);
                log_err("failed to attach program '%s' to netns",
                        bpf_program__name(prog));
index af87118..577d619 100644 (file)
@@ -97,12 +97,12 @@ static void check_result(void)
 
        err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
                                  &egress_linum);
-       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+       CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
              "err:%d errno:%d\n", err, errno);
 
        err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
                                  &ingress_linum);
-       CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+       CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
              "err:%d errno:%d\n", err, errno);
 
        memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
@@ -355,14 +355,12 @@ void test_sock_fields(void)
 
        egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
                                                 child_cg_fd);
-       if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
-                 PTR_ERR(egress_link)))
+       if (!ASSERT_OK_PTR(egress_link, "attach_cgroup(egress)"))
                goto done;
 
        ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
                                                  child_cg_fd);
-       if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
-                 PTR_ERR(ingress_link)))
+       if (!ASSERT_OK_PTR(ingress_link, "attach_cgroup(ingress)"))
                goto done;
 
        linum_map_fd = bpf_map__fd(skel->maps.linum_map);
@@ -375,8 +373,8 @@ done:
        bpf_link__destroy(egress_link);
        bpf_link__destroy(ingress_link);
        test_sock_fields__destroy(skel);
-       if (child_cg_fd != -1)
+       if (child_cg_fd >= 0)
                close(child_cg_fd);
-       if (parent_cg_fd != -1)
+       if (parent_cg_fd >= 0)
                close(parent_cg_fd);
 }
index ab77596..1352ec1 100644 (file)
@@ -88,11 +88,11 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
        int s, map, err;
 
        s = connected_socket_v4();
-       if (CHECK_FAIL(s == -1))
+       if (CHECK_FAIL(s < 0))
                return;
 
        map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
-       if (CHECK_FAIL(map == -1)) {
+       if (CHECK_FAIL(map < 0)) {
                perror("bpf_create_map");
                goto out;
        }
@@ -245,7 +245,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
        opts.link_info = &linfo;
        opts.link_info_len = sizeof(linfo);
        link = bpf_program__attach_iter(skel->progs.copy, &opts);
-       if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+       if (!ASSERT_OK_PTR(link, "attach_iter"))
                goto out;
 
        iter_fd = bpf_iter_create(bpf_link__fd(link));
@@ -304,7 +304,7 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
        }
 
        err = bpf_prog_attach(verdict, map, second, 0);
-       assert(err == -1 && errno == EBUSY);
+       ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
 
        err = bpf_prog_detach2(verdict, map, first);
        if (CHECK_FAIL(err)) {
index 06b86ad..7a0d64f 100644 (file)
@@ -98,7 +98,7 @@ static void run_tests(int family, enum bpf_map_type map_type)
        int map;
 
        map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
-       if (CHECK_FAIL(map == -1)) {
+       if (CHECK_FAIL(map < 0)) {
                perror("bpf_map_create");
                return;
        }
index 648d9ae..0f066b8 100644 (file)
 #define xbpf_map_delete_elem(fd, key)                                          \
        ({                                                                     \
                int __ret = bpf_map_delete_elem((fd), (key));                  \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                        FAIL_ERRNO("map_delete");                              \
                __ret;                                                         \
        })
 #define xbpf_map_lookup_elem(fd, key, val)                                     \
        ({                                                                     \
                int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                        FAIL_ERRNO("map_lookup");                              \
                __ret;                                                         \
        })
 #define xbpf_map_update_elem(fd, key, val, flags)                              \
        ({                                                                     \
                int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                        FAIL_ERRNO("map_update");                              \
                __ret;                                                         \
        })
        ({                                                                     \
                int __ret =                                                    \
                        bpf_prog_attach((prog), (target), (type), (flags));    \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                        FAIL_ERRNO("prog_attach(" #type ")");                  \
                __ret;                                                         \
        })
 #define xbpf_prog_detach2(prog, target, type)                                  \
        ({                                                                     \
                int __ret = bpf_prog_detach2((prog), (target), (type));        \
-               if (__ret == -1)                                               \
+               if (__ret < 0)                                               \
                        FAIL_ERRNO("prog_detach2(" #type ")");                 \
                __ret;                                                         \
        })
index 11a769e..0a91d8d 100644 (file)
@@ -62,8 +62,7 @@ retry:
 
        skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
                                                           pmu_fd);
-       if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
-                 "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+       if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
                close(pmu_fd);
                goto cleanup;
        }
index 37269d2..04b476b 100644 (file)
@@ -21,7 +21,7 @@ void test_stacktrace_map(void)
                goto close_prog;
 
        link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
-       if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_tp"))
                goto close_prog;
 
        /* find map fds */
index 404a549..4fd30bb 100644 (file)
@@ -21,7 +21,7 @@ void test_stacktrace_map_raw_tp(void)
                goto close_prog;
 
        link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
-       if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                goto close_prog;
 
        /* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
                goto close_prog;
 
 close_prog:
-       if (!IS_ERR_OR_NULL(link))
-               bpf_link__destroy(link);
+       bpf_link__destroy(link);
        bpf_object__close(obj);
 }
index 08d19ca..1fa7720 100644 (file)
@@ -353,8 +353,7 @@ static void fastopen_estab(void)
                return;
 
        link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                return;
 
        if (sk_fds_connect(&sk_fds, true)) {
@@ -398,8 +397,7 @@ static void syncookie_estab(void)
                return;
 
        link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                return;
 
        if (sk_fds_connect(&sk_fds, false)) {
@@ -431,8 +429,7 @@ static void fin(void)
                return;
 
        link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                return;
 
        if (sk_fds_connect(&sk_fds, false)) {
@@ -471,8 +468,7 @@ static void __simple_estab(bool exprm)
                return;
 
        link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
                return;
 
        if (sk_fds_connect(&sk_fds, false)) {
@@ -509,8 +505,7 @@ static void misc(void)
                return;
 
        link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
-       if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
-                 PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
                return;
 
        if (sk_fds_connect(&sk_fds, false)) {
index 9966685..123c68c 100644 (file)
@@ -73,7 +73,7 @@ void test_test_overhead(void)
                return;
 
        obj = bpf_object__open_file("./test_overhead.o", NULL);
-       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+       if (!ASSERT_OK_PTR(obj, "obj_open_file"))
                return;
 
        kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
@@ -108,7 +108,7 @@ void test_test_overhead(void)
        /* attach kprobe */
        link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
                                          kprobe_func);
-       if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_kprobe"))
                goto cleanup;
        test_run("kprobe");
        bpf_link__destroy(link);
@@ -116,28 +116,28 @@ void test_test_overhead(void)
        /* attach kretprobe */
        link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
                                          kprobe_func);
-       if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
                goto cleanup;
        test_run("kretprobe");
        bpf_link__destroy(link);
 
        /* attach raw_tp */
        link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
-       if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
                goto cleanup;
        test_run("raw_tp");
        bpf_link__destroy(link);
 
        /* attach fentry */
        link = bpf_program__attach_trace(fentry_prog);
-       if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_fentry"))
                goto cleanup;
        test_run("fentry");
        bpf_link__destroy(link);
 
        /* attach fexit */
        link = bpf_program__attach_trace(fexit_prog);
-       if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "attach_fexit"))
                goto cleanup;
        test_run("fexit");
        bpf_link__destroy(link);
index f3022d9..d7f5a93 100644 (file)
@@ -55,7 +55,7 @@ void test_trampoline_count(void)
        /* attach 'allowed' trampoline programs */
        for (i = 0; i < MAX_TRAMP_PROGS; i++) {
                obj = bpf_object__open_file(object, NULL);
-               if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+               if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
                        obj = NULL;
                        goto cleanup;
                }
@@ -68,14 +68,14 @@ void test_trampoline_count(void)
 
                if (rand() % 2) {
                        link = load(inst[i].obj, fentry_name);
-                       if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+                       if (!ASSERT_OK_PTR(link, "attach_prog")) {
                                link = NULL;
                                goto cleanup;
                        }
                        inst[i].link_fentry = link;
                } else {
                        link = load(inst[i].obj, fexit_name);
-                       if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+                       if (!ASSERT_OK_PTR(link, "attach_prog")) {
                                link = NULL;
                                goto cleanup;
                        }
@@ -85,7 +85,7 @@ void test_trampoline_count(void)
 
        /* and try 1 extra.. */
        obj = bpf_object__open_file(object, NULL);
-       if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+       if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
                obj = NULL;
                goto cleanup;
        }
@@ -96,13 +96,15 @@ void test_trampoline_count(void)
 
        /* ..that needs to fail */
        link = load(obj, fentry_name);
-       if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
+       err = libbpf_get_error(link);
+       if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
                bpf_link__destroy(link);
                goto cleanup_extra;
        }
 
        /* with E2BIG error */
-       CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+       ASSERT_EQ(err, -E2BIG, "proper error check");
+       ASSERT_EQ(link, NULL, "ptr_is_null");
 
        /* and finaly execute the probe */
        if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
index 2aba09d..56c9d6b 100644 (file)
@@ -22,11 +22,10 @@ void test_udp_limit(void)
                goto close_cgroup_fd;
 
        skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+       if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+               goto close_skeleton;
        skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
-       if (CHECK(IS_ERR(skel->links.sock) || IS_ERR(skel->links.sock_release),
-                 "cg-attach", "sock %ld sock_release %ld",
-                 PTR_ERR(skel->links.sock),
-                 PTR_ERR(skel->links.sock_release)))
+       if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
                goto close_skeleton;
 
        /* BPF program enforces a single UDP socket per cgroup,
index 2c6c570..3bd5904 100644 (file)
@@ -90,7 +90,7 @@ void test_xdp_bpf2bpf(void)
        pb_opts.ctx = &passed;
        pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
                              1, &pb_opts);
-       if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+       if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
                goto out;
 
        /* Run test program */
index 6f81499..46eed0a 100644 (file)
@@ -51,7 +51,7 @@ void test_xdp_link(void)
 
        /* BPF link is not allowed to replace prog attachment */
        link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+       if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
                bpf_link__destroy(link);
                /* best-effort detach prog */
                opts.old_fd = prog_fd1;
@@ -67,7 +67,7 @@ void test_xdp_link(void)
 
        /* now BPF link should attach successfully */
        link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                goto cleanup;
        skel1->links.xdp_handler = link;
 
@@ -95,7 +95,7 @@ void test_xdp_link(void)
 
        /* BPF link is not allowed to replace another BPF link */
        link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(!IS_ERR(link), "link_attach_fail", "unexpected success\n")) {
+       if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
                bpf_link__destroy(link);
                goto cleanup;
        }
@@ -105,7 +105,7 @@ void test_xdp_link(void)
 
        /* new link attach should succeed */
        link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
-       if (CHECK(IS_ERR(link), "link_attach", "failed: %ld\n", PTR_ERR(link)))
+       if (!ASSERT_OK_PTR(link, "link_attach"))
                goto cleanup;
        skel2->links.xdp_handler = link;
 
index 6dfce3f..0aa3cd3 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index b83b5d2..6c39e86 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index d58d9f1..784a610 100644 (file)
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index 95989f4..a28e51e 100644 (file)
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index b7f32c1..c86b93f 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index a1ddc36..bca8b88 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020, Oracle and/or its affiliates. */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
 #include <errno.h>
index b2f7c7c..6e7b400 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index 43c36f5..f2b8167 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index 11d1aa3..4ea6a37 100644 (file)
@@ -2,7 +2,6 @@
 /* Copyright (c) 2020 Facebook */
 #include "bpf_iter.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
index 54380c5..2e4775c 100644 (file)
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
index b4fbddf..943f7bb 100644 (file)
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
index f258583..cf0c485 100644 (file)
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
index 65f93bb..5031e21 100644 (file)
@@ -3,7 +3,6 @@
 #include "bpf_iter.h"
 #include "bpf_tracing_net.h"
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <bpf/bpf_endian.h>
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644 (file)
index 0000000..3a193f4
--- /dev/null
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 2);
+       __type(key, __u64);
+       __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+       if (set_pid == bpf_get_current_pid_tgid() >> 32)
+               bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+       return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644 (file)
index 0000000..27df571
--- /dev/null
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ *   1. If reuse_md->migrating_sk is NULL (SYN packet),
+ *        return SK_PASS without selecting a listener.
+ *   2. If reuse_md->migrating_sk is not NULL (socket migration),
+ *        select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+       __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+       __uint(max_entries, 256);
+       __type(key, int);
+       __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(max_entries, 256);
+       __type(key, __u64);
+       __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+       void *data_end = (void *)(long)xdp->data_end;
+       void *data = (void *)(long)xdp->data;
+       struct ethhdr *eth = data;
+       struct tcphdr *tcp = NULL;
+
+       if (eth + 1 > data_end)
+               goto pass;
+
+       switch (bpf_ntohs(eth->h_proto)) {
+       case ETH_P_IP: {
+               struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+               if (ip + 1 > data_end)
+                       goto pass;
+
+               if (ip->protocol != IPPROTO_TCP)
+                       goto pass;
+
+               tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+               break;
+       }
+       case ETH_P_IPV6: {
+               struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+               if (ipv6 + 1 > data_end)
+                       goto pass;
+
+               if (ipv6->nexthdr != IPPROTO_TCP)
+                       goto pass;
+
+               tcp = (struct tcphdr *)(ipv6 + 1);
+               break;
+       }
+       default:
+               goto pass;
+       }
+
+       if (tcp + 1 > data_end)
+               goto pass;
+
+       if (tcp->dest != server_port)
+               goto pass;
+
+       if (!tcp->syn && tcp->ack)
+               return XDP_DROP;
+
+pass:
+       return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+       int *key, flags = 0, state, err;
+       __u64 cookie;
+
+       if (!reuse_md->migrating_sk)
+               return SK_PASS;
+
+       state = reuse_md->migrating_sk->state;
+       cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+       key = bpf_map_lookup_elem(&migrate_map, &cookie);
+       if (!key)
+               return SK_DROP;
+
+       err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+       if (err)
+               return SK_PASS;
+
+       switch (state) {
+       case BPF_TCP_ESTABLISHED:
+               __sync_fetch_and_add(&migrated_at_close, 1);
+               break;
+       case BPF_TCP_SYN_RECV:
+               __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+               break;
+       case BPF_TCP_NEW_SYN_RECV:
+               if (!reuse_md->len)
+                       __sync_fetch_and_add(&migrated_at_send_synack, 1);
+               else
+                       __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+               break;
+       }
+
+       return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index e35129b..e2ad261 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 
 __u32 pid = 0;
 
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644 (file)
index 0000000..880debc
--- /dev/null
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(int));
+       __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __type(key, __u32);
+       __type(value, __be64);
+       __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp_redirect_map_multi")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       int if_index = ctx->ingress_ifindex;
+       struct ethhdr *eth = data;
+       __u16 h_proto;
+       __u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       h_proto = eth->h_proto;
+
+       /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+       if (h_proto == bpf_htons(ETH_P_IP))
+               return bpf_redirect_map(&map_all, 0,
+                                       BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+       /* Using IPv6 for none flag testing */
+       else if (h_proto == bpf_htons(ETH_P_IPV6))
+               return bpf_redirect_map(&map_all, if_index, 0);
+       /* All others for BPF_F_BROADCAST testing */
+       else
+               return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp_redirect_map_ingress")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+       return bpf_redirect_map(&map_egress, 0,
+                               BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp_devmap/map_prog")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       __u32 key = ctx->egress_ifindex;
+       struct ethhdr *eth = data;
+       __u64 nh_off;
+       __be64 *mac;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       mac = bpf_map_lookup_elem(&mac_map, &key);
+       if (mac)
+               __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
index 7eb940a..ed12111 100755 (executable)
@@ -1,5 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
 
 # Assume script is located under tools/testing/selftests/bpf/. We want to start
 # build attempts from the top of kernel repository.
index 6a5349f..7e9049f 100644 (file)
@@ -231,6 +231,14 @@ static void test_lru_sanity0(int map_type, int map_flags)
        assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
               errno == ENOENT);
 
+       /* lookup elem key=1 and delete it, then check it doesn't exist */
+       key = 1;
+       assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+       assert(value[0] == 1234);
+
+       /* remove the same element from the expected map */
+       assert(!bpf_map_delete_elem(expected_map_fd, &key));
+
        assert(map_equal(lru_map_fd, expected_map_fd));
 
        close(expected_map_fd);
index 51adc42..30cbf5d 100644 (file)
@@ -53,23 +53,30 @@ static void test_hashmap(unsigned int task, void *data)
 
        value = 0;
        /* BPF_NOEXIST means add new element if it doesn't exist. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
               /* key=1 already exists. */
               errno == EEXIST);
 
        /* -1 is an invalid flag. */
-       assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
               errno == EINVAL);
 
        /* Check that key=1 can be found. */
        assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
 
        key = 2;
+       value = 1234;
+       /* Insert key=2 element. */
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+       /* Check that key=2 matches the value and delete it */
+       assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
        /* Check that key=2 is not found. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
 
        /* BPF_EXIST means update existing element. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
               /* key=2 is not there. */
               errno == ENOENT);
 
@@ -80,7 +87,7 @@ static void test_hashmap(unsigned int task, void *data)
         * inserted due to max_entries limit.
         */
        key = 0;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
               errno == E2BIG);
 
        /* Update existing element, though the map is full. */
@@ -89,12 +96,12 @@ static void test_hashmap(unsigned int task, void *data)
        key = 2;
        assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
        key = 3;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
               errno == E2BIG);
 
        /* Check that key = 0 doesn't exist. */
        key = 0;
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
        /* Iterate over two elements. */
        assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +111,7 @@ static void test_hashmap(unsigned int task, void *data)
        assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
               (next_key == 1 || next_key == 2) &&
               (next_key != first_key));
-       assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
               errno == ENOENT);
 
        /* Delete both elements. */
@@ -112,13 +119,13 @@ static void test_hashmap(unsigned int task, void *data)
        assert(bpf_map_delete_elem(fd, &key) == 0);
        key = 2;
        assert(bpf_map_delete_elem(fd, &key) == 0);
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
        key = 0;
        /* Check that map is empty. */
-       assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
               errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
               errno == ENOENT);
 
        close(fd);
@@ -166,15 +173,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
        /* Insert key=1 element. */
        assert(!(expected_key_mask & key));
        assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+       /* Lookup and delete elem key=1 and check value. */
+       assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+              bpf_percpu(value,0) == 100);
+
+       for (i = 0; i < nr_cpus; i++)
+               bpf_percpu(value,i) = i + 100;
+
+       /* Insert key=1 element which should not exist. */
+       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
        expected_key_mask |= key;
 
        /* BPF_NOEXIST means add new element if it doesn't exist. */
-       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
               /* key=1 already exists. */
               errno == EEXIST);
 
        /* -1 is an invalid flag. */
-       assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
               errno == EINVAL);
 
        /* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +203,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 
        key = 2;
        /* Check that key=2 is not found. */
-       assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
 
        /* BPF_EXIST means update existing element. */
-       assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
               /* key=2 is not there. */
               errno == ENOENT);
 
@@ -202,11 +219,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
         * inserted due to max_entries limit.
         */
        key = 0;
-       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
               errno == E2BIG);
 
        /* Check that key = 0 doesn't exist. */
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
        /* Iterate over two elements. */
        assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,13 +254,13 @@ static void test_hashmap_percpu(unsigned int task, void *data)
        assert(bpf_map_delete_elem(fd, &key) == 0);
        key = 2;
        assert(bpf_map_delete_elem(fd, &key) == 0);
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
 
        key = 0;
        /* Check that map is empty. */
-       assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
               errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
               errno == ENOENT);
 
        close(fd);
@@ -360,7 +377,7 @@ static void test_arraymap(unsigned int task, void *data)
        assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
        value = 0;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
               errno == EEXIST);
 
        /* Check that key=1 can be found. */
@@ -374,11 +391,11 @@ static void test_arraymap(unsigned int task, void *data)
         * due to max_entries limit.
         */
        key = 2;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
               errno == E2BIG);
 
        /* Check that key = 2 doesn't exist. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
 
        /* Iterate over two elements. */
        assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +404,12 @@ static void test_arraymap(unsigned int task, void *data)
               next_key == 0);
        assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
               next_key == 1);
-       assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
               errno == ENOENT);
 
        /* Delete shouldn't succeed. */
        key = 1;
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
 
        close(fd);
 }
@@ -418,7 +435,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
        assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
 
        bpf_percpu(values, 0) = 0;
-       assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
               errno == EEXIST);
 
        /* Check that key=1 can be found. */
@@ -433,11 +450,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
 
        /* Check that key=2 cannot be inserted due to max_entries limit. */
        key = 2;
-       assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
               errno == E2BIG);
 
        /* Check that key = 2 doesn't exist. */
-       assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
 
        /* Iterate over two elements. */
        assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +463,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
               next_key == 0);
        assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
               next_key == 1);
-       assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+       assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
               errno == ENOENT);
 
        /* Delete shouldn't succeed. */
        key = 1;
-       assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
 
        close(fd);
 }
@@ -555,7 +572,7 @@ static void test_queuemap(unsigned int task, void *data)
                assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
 
        /* Check that element cannot be pushed due to max_entries limit */
-       assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+       assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
               errno == E2BIG);
 
        /* Peek element */
@@ -571,12 +588,12 @@ static void test_queuemap(unsigned int task, void *data)
                       val == vals[i]);
 
        /* Check that there are not elements left */
-       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
               errno == ENOENT);
 
        /* Check that non supported functions set errno to EINVAL */
-       assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
-       assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+       assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
 
        close(fd);
 }
@@ -613,7 +630,7 @@ static void test_stackmap(unsigned int task, void *data)
                assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
 
        /* Check that element cannot be pushed due to max_entries limit */
-       assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+       assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
               errno == E2BIG);
 
        /* Peek element */
@@ -629,12 +646,12 @@ static void test_stackmap(unsigned int task, void *data)
                       val == vals[i]);
 
        /* Check that there are not elements left */
-       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
               errno == ENOENT);
 
        /* Check that non supported functions set errno to EINVAL */
-       assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
-       assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+       assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+       assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
 
        close(fd);
 }
@@ -835,7 +852,7 @@ static void test_sockmap(unsigned int tasks, void *data)
        }
 
        bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
-       if (IS_ERR(bpf_map_rx)) {
+       if (!bpf_map_rx) {
                printf("Failed to load map rx from verdict prog\n");
                goto out_sockmap;
        }
@@ -847,7 +864,7 @@ static void test_sockmap(unsigned int tasks, void *data)
        }
 
        bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
-       if (IS_ERR(bpf_map_tx)) {
+       if (!bpf_map_tx) {
                printf("Failed to load map tx from verdict prog\n");
                goto out_sockmap;
        }
@@ -859,7 +876,7 @@ static void test_sockmap(unsigned int tasks, void *data)
        }
 
        bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
-       if (IS_ERR(bpf_map_msg)) {
+       if (!bpf_map_msg) {
                printf("Failed to load map msg from msg_verdict prog\n");
                goto out_sockmap;
        }
@@ -871,7 +888,7 @@ static void test_sockmap(unsigned int tasks, void *data)
        }
 
        bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
-       if (IS_ERR(bpf_map_break)) {
+       if (!bpf_map_break) {
                printf("Failed to load map tx from verdict prog\n");
                goto out_sockmap;
        }
@@ -1153,7 +1170,7 @@ static void test_map_in_map(void)
        }
 
        map = bpf_object__find_map_by_name(obj, "mim_array");
-       if (IS_ERR(map)) {
+       if (!map) {
                printf("Failed to load array of maps from test prog\n");
                goto out_map_in_map;
        }
@@ -1164,7 +1181,7 @@ static void test_map_in_map(void)
        }
 
        map = bpf_object__find_map_by_name(obj, "mim_hash");
-       if (IS_ERR(map)) {
+       if (!map) {
                printf("Failed to load hash of maps from test prog\n");
                goto out_map_in_map;
        }
@@ -1177,7 +1194,7 @@ static void test_map_in_map(void)
        bpf_object__load(obj);
 
        map = bpf_object__find_map_by_name(obj, "mim_array");
-       if (IS_ERR(map)) {
+       if (!map) {
                printf("Failed to load array of maps from test prog\n");
                goto out_map_in_map;
        }
@@ -1194,7 +1211,7 @@ static void test_map_in_map(void)
        }
 
        map = bpf_object__find_map_by_name(obj, "mim_hash");
-       if (IS_ERR(map)) {
+       if (!map) {
                printf("Failed to load hash of maps from test prog\n");
                goto out_map_in_map;
        }
@@ -1246,7 +1263,7 @@ static void test_map_large(void)
        }
 
        key.c = -1;
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
               errno == E2BIG);
 
        /* Iterate through all elements. */
@@ -1254,12 +1271,12 @@ static void test_map_large(void)
        key.c = -1;
        for (i = 0; i < MAP_SIZE; i++)
                assert(bpf_map_get_next_key(fd, &key, &key) == 0);
-       assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
 
        key.c = 0;
        assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
        key.a = 1;
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
 
        close(fd);
 }
@@ -1391,7 +1408,7 @@ static void test_map_parallel(void)
        run_parallel(TASKS, test_update_delete, data);
 
        /* Check that key=0 is already there. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
               errno == EEXIST);
 
        /* Check that all elements were inserted. */
@@ -1399,7 +1416,7 @@ static void test_map_parallel(void)
        key = -1;
        for (i = 0; i < MAP_SIZE; i++)
                assert(bpf_map_get_next_key(fd, &key, &key) == 0);
-       assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
 
        /* Another check for all elements */
        for (i = 0; i < MAP_SIZE; i++) {
@@ -1415,8 +1432,8 @@ static void test_map_parallel(void)
 
        /* Nothing should be left. */
        key = -1;
-       assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
 }
 
 static void test_map_rdonly(void)
@@ -1434,12 +1451,12 @@ static void test_map_rdonly(void)
        key = 1;
        value = 1234;
        /* Try to insert key=1 element. */
-       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+       assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
               errno == EPERM);
 
        /* Check that key=1 is not found. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
-       assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+       assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
 
        close(fd);
 }
@@ -1462,8 +1479,8 @@ static void test_map_wronly_hash(void)
        assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
        /* Check that reading elements and keys from the map is not allowed. */
-       assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
-       assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+       assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+       assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
 
        close(fd);
 }
@@ -1490,10 +1507,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
        assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
 
        /* Peek element should fail */
-       assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+       assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
 
        /* Pop element should fail */
-       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+       assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
               errno == EPERM);
 
        close(fd);
@@ -1547,7 +1564,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
                        value = &fd32;
                }
                err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
-               CHECK(err != -1 || errno != EINVAL,
+               CHECK(err >= 0 || errno != EINVAL,
                      "reuseport array update unbound sk",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
@@ -1576,7 +1593,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
                         */
                        err = bpf_map_update_elem(map_fd, &index0, value,
                                                  BPF_ANY);
-                       CHECK(err != -1 || errno != EINVAL,
+                       CHECK(err >= 0 || errno != EINVAL,
                              "reuseport array update non-listening sk",
                              "sock_type:%d err:%d errno:%d\n",
                              type, err, errno);
@@ -1606,31 +1623,31 @@ static void test_reuseport_array(void)
 
        map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
                                sizeof(__u32), sizeof(__u64), array_size, 0);
-       CHECK(map_fd == -1, "reuseport array create",
+       CHECK(map_fd < 0, "reuseport array create",
              "map_fd:%d, errno:%d\n", map_fd, errno);
 
        /* Test lookup/update/delete with invalid index */
        err = bpf_map_delete_elem(map_fd, &bad_index);
-       CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+       CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
              "err:%d errno:%d\n", err, errno);
 
        err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
-       CHECK(err != -1 || errno != E2BIG,
+       CHECK(err >= 0 || errno != E2BIG,
              "reuseport array update >=max_entries",
              "err:%d errno:%d\n", err, errno);
 
        err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
-       CHECK(err != -1 || errno != ENOENT,
+       CHECK(err >= 0 || errno != ENOENT,
              "reuseport array update >=max_entries",
              "err:%d errno:%d\n", err, errno);
 
        /* Test lookup/delete non existence elem */
        err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-       CHECK(err != -1 || errno != ENOENT,
+       CHECK(err >= 0 || errno != ENOENT,
              "reuseport array lookup not-exist elem",
              "err:%d errno:%d\n", err, errno);
        err = bpf_map_delete_elem(map_fd, &index3);
-       CHECK(err != -1 || errno != ENOENT,
+       CHECK(err >= 0 || errno != ENOENT,
              "reuseport array del not-exist elem",
              "err:%d errno:%d\n", err, errno);
 
@@ -1644,7 +1661,7 @@ static void test_reuseport_array(void)
                /* BPF_EXIST failure case */
                err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                          BPF_EXIST);
-               CHECK(err != -1 || errno != ENOENT,
+               CHECK(err >= 0 || errno != ENOENT,
                      "reuseport array update empty elem BPF_EXIST",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
@@ -1653,7 +1670,7 @@ static void test_reuseport_array(void)
                /* BPF_NOEXIST success case */
                err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                          BPF_NOEXIST);
-               CHECK(err == -1,
+               CHECK(err < 0,
                      "reuseport array update empty elem BPF_NOEXIST",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
@@ -1662,7 +1679,7 @@ static void test_reuseport_array(void)
                /* BPF_EXIST success case. */
                err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                          BPF_EXIST);
-               CHECK(err == -1,
+               CHECK(err < 0,
                      "reuseport array update same elem BPF_EXIST",
                      "sock_type:%d err:%d errno:%d\n", type, err, errno);
                fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1670,7 +1687,7 @@ static void test_reuseport_array(void)
                /* BPF_NOEXIST failure case */
                err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                          BPF_NOEXIST);
-               CHECK(err != -1 || errno != EEXIST,
+               CHECK(err >= 0 || errno != EEXIST,
                      "reuseport array update non-empty elem BPF_NOEXIST",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
@@ -1679,7 +1696,7 @@ static void test_reuseport_array(void)
                /* BPF_ANY case (always succeed) */
                err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
                                          BPF_ANY);
-               CHECK(err == -1,
+               CHECK(err < 0,
                      "reuseport array update same sk with BPF_ANY",
                      "sock_type:%d err:%d errno:%d\n", type, err, errno);
 
@@ -1688,32 +1705,32 @@ static void test_reuseport_array(void)
 
                /* The same sk cannot be added to reuseport_array twice */
                err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
-               CHECK(err != -1 || errno != EBUSY,
+               CHECK(err >= 0 || errno != EBUSY,
                      "reuseport array update same sk with same index",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
 
                err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
-               CHECK(err != -1 || errno != EBUSY,
+               CHECK(err >= 0 || errno != EBUSY,
                      "reuseport array update same sk with different index",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
 
                /* Test delete elem */
                err = bpf_map_delete_elem(map_fd, &index3);
-               CHECK(err == -1, "reuseport array delete sk",
+               CHECK(err < 0, "reuseport array delete sk",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
 
                /* Add it back with BPF_NOEXIST */
                err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
-               CHECK(err == -1,
+               CHECK(err < 0,
                      "reuseport array re-add with BPF_NOEXIST after del",
                      "sock_type:%d err:%d errno:%d\n", type, err, errno);
 
                /* Test cookie */
                err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-               CHECK(err == -1 || sk_cookie != map_cookie,
+               CHECK(err < 0 || sk_cookie != map_cookie,
                      "reuseport array lookup re-added sk",
                      "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
                      type, err, errno, sk_cookie, map_cookie);
@@ -1722,7 +1739,7 @@ static void test_reuseport_array(void)
                for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
                        close(grpa_fds64[f]);
                err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-               CHECK(err != -1 || errno != ENOENT,
+               CHECK(err >= 0 || errno != ENOENT,
                      "reuseport array lookup after close()",
                      "sock_type:%d err:%d errno:%d\n",
                      type, err, errno);
@@ -1733,7 +1750,7 @@ static void test_reuseport_array(void)
        CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
              err, errno);
        err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
-       CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+       CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
              "err:%d errno:%d\n", err, errno);
        close(fd64);
 
@@ -1743,16 +1760,16 @@ static void test_reuseport_array(void)
        /* Test 32 bit fd */
        map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
                                sizeof(__u32), sizeof(__u32), array_size, 0);
-       CHECK(map_fd == -1, "reuseport array create",
+       CHECK(map_fd < 0, "reuseport array create",
              "map_fd:%d, errno:%d\n", map_fd, errno);
        prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
                              &sk_cookie, 1);
        fd = fd64;
        err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
-       CHECK(err == -1, "reuseport array update 32 bit fd",
+       CHECK(err < 0, "reuseport array update 32 bit fd",
              "err:%d errno:%d\n", err, errno);
        err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
-       CHECK(err != -1 || errno != ENOSPC,
+       CHECK(err >= 0 || errno != ENOSPC,
              "reuseport array lookup 32 bit fd",
              "err:%d errno:%d\n", err, errno);
        close(fd);
@@ -1798,6 +1815,8 @@ int main(void)
 {
        srand(time(NULL));
 
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
        map_flags = 0;
        run_all_tests();
 
index 6396932..6f10310 100644 (file)
@@ -737,6 +737,9 @@ int main(int argc, char **argv)
        if (err)
                return err;
 
+       /* Use libbpf 1.0 API mode */
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
        libbpf_set_print(libbpf_print_fn);
 
        srand(time(NULL));
index dda52cb..8ef7f33 100644 (file)
@@ -249,16 +249,17 @@ extern int test__join_cgroup(const char *path);
 #define ASSERT_OK_PTR(ptr, name) ({                                    \
        static int duration = 0;                                        \
        const void *___res = (ptr);                                     \
-       bool ___ok = !IS_ERR_OR_NULL(___res);                           \
-       CHECK(!___ok, (name),                                           \
-             "unexpected error: %ld\n", PTR_ERR(___res));              \
+       int ___err = libbpf_get_error(___res);                          \
+       bool ___ok = ___err == 0;                                       \
+       CHECK(!___ok, (name), "unexpected error: %d\n", ___err);        \
        ___ok;                                                          \
 })
 
 #define ASSERT_ERR_PTR(ptr, name) ({                                   \
        static int duration = 0;                                        \
        const void *___res = (ptr);                                     \
-       bool ___ok = IS_ERR(___res);                                    \
+       int ___err = libbpf_get_error(___res);                          \
+       bool ___ok = ___err != 0;                                       \
        CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);      \
        ___ok;                                                          \
 })
index 73da7fe..4a39304 100644 (file)
@@ -82,6 +82,8 @@ int main(int argc, char **argv)
        cpu_set_t cpuset;
        __u32 key = 0;
 
+       libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
        CPU_ZERO(&cpuset);
        CPU_SET(0, &cpuset);
        pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
@@ -116,7 +118,7 @@ int main(int argc, char **argv)
 
        pb_opts.sample_cb = dummyfn;
        pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
-       if (IS_ERR(pb))
+       if (!pb)
                goto err;
 
        pthread_create(&tid, NULL, poller_thread, pb);
@@ -163,7 +165,6 @@ err:
        bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
        close(cg_fd);
        cleanup_cgroup_environment();
-       if (!IS_ERR_OR_NULL(pb))
-               perf_buffer__free(pb);
+       perf_buffer__free(pb);
        return error;
 }
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
new file mode 100755 (executable)
index 0000000..1538373
--- /dev/null
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+#     - - - - - - - - - - - - - - - - - - - - - - - - -
+#    | veth1         veth2         veth3 |  ... init net
+#     - -| - - - - - - | - - - - - - | - -
+#    ---------     ---------     ---------
+#    | veth0 |     | veth0 |     | veth0 |  ...
+#    ---------     ---------     ---------
+#       ns1           ns2           ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+#   ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+#   the redirects.
+#      ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+#   IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+#   interface should not receive the redirects.
+#      ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+#   IPv6: Testing none flag, all the pkts should be redirected back
+#      ping test: ns1 -> ns2 (block), echo requests will be redirect back
+#   egress_prog:
+#      all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+
+test_pass()
+{
+       echo "Pass: $@"
+       PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+       echo "fail: $@"
+       FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+       for i in $(seq $NUM); do
+               ip link del veth$i 2> /dev/null
+               ip netns del ns$i 2> /dev/null
+       done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+       ip link set dev lo xdpgeneric off &>/dev/null
+       if [ $? -ne 0 ];then
+               echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+               exit 4
+       fi
+
+       which tcpdump &>/dev/null
+       if [ $? -ne 0 ];then
+               echo "selftests: [SKIP] Could not run test without tcpdump"
+               exit 4
+       fi
+}
+
+setup_ns()
+{
+       local mode=$1
+       IFACES=""
+
+       if [ "$mode" = "xdpegress" ]; then
+               mode="xdpdrv"
+       fi
+
+       for i in $(seq $NUM); do
+               ip netns add ns$i
+               ip link add veth$i type veth peer name veth0 netns ns$i
+               ip link set veth$i up
+               ip -n ns$i link set veth0 up
+
+               ip -n ns$i addr add 192.0.2.$i/24 dev veth0
+               ip -n ns$i addr add 2001:db8::$i/64 dev veth0
+               # Add a neigh entry for IPv4 ping test
+               ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+               ip -n ns$i link set veth0 $mode obj \
+                       xdp_dummy.o sec xdp_dummy &> /dev/null || \
+                       { test_fail "Unable to load dummy xdp" && exit 1; }
+               IFACES="$IFACES veth$i"
+               veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
+       done
+}
+
+do_egress_tests()
+{
+       local mode=$1
+
+       # mac test
+       ip netns exec ns2 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-2_${mode}.log &
+       ip netns exec ns3 tcpdump -e -i veth0 -nn -l -e &> mac_ns1-3_${mode}.log &
+       sleep 0.5
+       ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+       sleep 0.5
+       pkill -9 tcpdump
+
+       # mac check
+       grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" mac_ns1-2_${mode}.log && \
+              test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+       grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" mac_ns1-3_${mode}.log && \
+               test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+       local mode=$1
+
+       # ping6 test: echo request should be redirect back to itself, not others
+       ip netns exec ns1 ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+       ip netns exec ns1 tcpdump -i veth0 -nn -l -e &> ns1-1_${mode}.log &
+       ip netns exec ns2 tcpdump -i veth0 -nn -l -e &> ns1-2_${mode}.log &
+       ip netns exec ns3 tcpdump -i veth0 -nn -l -e &> ns1-3_${mode}.log &
+       sleep 0.5
+       # ARP test
+       ip netns exec ns1 ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+       # IPv4 test
+       ip netns exec ns1 ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+       # IPv6 test
+       ip netns exec ns1 ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+       sleep 0.5
+       pkill -9 tcpdump
+
+       # All netns should receive the redirect arp requests
+       [ $(grep -c "who-has 192.0.2.254" ns1-1_${mode}.log) -gt 4 ] && \
+               test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+               test_fail "$mode arp(F_BROADCAST) ns1-1"
+       [ $(grep -c "who-has 192.0.2.254" ns1-2_${mode}.log) -le 4 ] && \
+               test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+               test_fail "$mode arp(F_BROADCAST) ns1-2"
+       [ $(grep -c "who-has 192.0.2.254" ns1-3_${mode}.log) -le 4 ] && \
+               test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+               test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+       # ns1 should not receive the redirect echo request, others should
+       [ $(grep -c "ICMP echo request" ns1-1_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+               test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+       [ $(grep -c "ICMP echo request" ns1-2_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+               test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+       [ $(grep -c "ICMP echo request" ns1-3_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+               test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+       # ns1 should receive the echo request, ns2 should not
+       [ $(grep -c "ICMP6, echo request" ns1-1_${mode}.log) -eq 4 ] && \
+               test_pass "$mode IPv6 (no flags) ns1-1" || \
+               test_fail "$mode IPv6 (no flags) ns1-1"
+       [ $(grep -c "ICMP6, echo request" ns1-2_${mode}.log) -eq 0 ] && \
+               test_pass "$mode IPv6 (no flags) ns1-2" || \
+               test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+       local mode=$1
+       local drv_p
+
+       case ${mode} in
+               xdpdrv)  drv_p="-N";;
+               xdpegress) drv_p="-X";;
+               xdpgeneric) drv_p="-S";;
+       esac
+
+       ./xdp_redirect_multi $drv_p $IFACES &> xdp_redirect_${mode}.log &
+       xdp_pid=$!
+       sleep 1
+
+       if [ "$mode" = "xdpegress" ]; then
+               do_egress_tests $mode
+       else
+               do_ping_tests $mode
+       fi
+
+       kill $xdp_pid
+}
+
+trap clean_up 0 2 3 6 9
+
+check_env
+rm -f xdp_redirect_*.log ns*.log mac_ns*.log
+
+for mode in ${DRV_MODE}; do
+       setup_ns $mode
+       do_tests $mode
+       clean_up
+done
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
new file mode 100644 (file)
index 0000000..3696a8f
--- /dev/null
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/resource.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+       __u32 prog_id = 0;
+       int i;
+
+       for (i = 0; ifaces[i] > 0; i++) {
+               if (bpf_get_link_xdp_id(ifaces[i], &prog_id, xdp_flags)) {
+                       printf("bpf_get_link_xdp_id failed\n");
+                       exit(1);
+               }
+               if (prog_id)
+                       bpf_set_link_xdp_fd(ifaces[i], -1, xdp_flags);
+       }
+
+       exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+       char ifname[IF_NAMESIZE];
+       struct ifreq ifr;
+       int fd, ret = -1;
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return ret;
+
+       if (!if_indextoname(ifindex, ifname))
+               goto err_out;
+
+       strcpy(ifr.ifr_name, ifname);
+
+       if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+               goto err_out;
+
+       memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+       ret = 0;
+
+err_out:
+       close(fd);
+       return ret;
+}
+
+static void usage(const char *prog)
+{
+       fprintf(stderr,
+               "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+               "OPTS:\n"
+               "    -S    use skb-mode\n"
+               "    -N    enforce native mode\n"
+               "    -F    force loading prog\n"
+               "    -X    load xdp program on egress\n",
+               prog);
+}
+
+int main(int argc, char **argv)
+{
+       int prog_fd, group_all, mac_map;
+       struct bpf_program *ingress_prog, *egress_prog;
+       struct bpf_prog_load_attr prog_load_attr = {
+               .prog_type = BPF_PROG_TYPE_UNSPEC,
+       };
+       int i, ret, opt, egress_prog_fd = 0;
+       struct bpf_devmap_val devmap_val;
+       bool attach_egress_prog = false;
+       unsigned char mac_addr[6];
+       char ifname[IF_NAMESIZE];
+       struct bpf_object *obj;
+       unsigned int ifindex;
+       char filename[256];
+
+       while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+               switch (opt) {
+               case 'S':
+                       xdp_flags |= XDP_FLAGS_SKB_MODE;
+                       break;
+               case 'N':
+                       /* default, set below */
+                       break;
+               case 'F':
+                       xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+                       break;
+               case 'X':
+                       attach_egress_prog = true;
+                       break;
+               default:
+                       usage(basename(argv[0]));
+                       return 1;
+               }
+       }
+
+       if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+               xdp_flags |= XDP_FLAGS_DRV_MODE;
+       } else if (attach_egress_prog) {
+               printf("Load xdp program on egress with SKB mode not supported yet\n");
+               goto err_out;
+       }
+
+       if (optind == argc) {
+               printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+               goto err_out;
+       }
+
+       printf("Get interfaces");
+       for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+               ifaces[i] = if_nametoindex(argv[optind + i]);
+               if (!ifaces[i])
+                       ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+               if (!if_indextoname(ifaces[i], ifname)) {
+                       perror("Invalid interface name or i");
+                       goto err_out;
+               }
+               if (ifaces[i] > MAX_INDEX_NUM) {
+                       printf("Interface index to large\n");
+                       goto err_out;
+               }
+               printf(" %d", ifaces[i]);
+       }
+       printf("\n");
+
+       snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+       prog_load_attr.file = filename;
+
+       if (bpf_prog_load_xattr(&prog_load_attr, &obj, &prog_fd))
+               goto err_out;
+
+       if (attach_egress_prog)
+               group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+       else
+               group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+       mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+       if (group_all < 0 || mac_map < 0) {
+               printf("bpf_object__find_map_fd_by_name failed\n");
+               goto err_out;
+       }
+
+       if (attach_egress_prog) {
+               /* Find ingress/egress prog for 2nd xdp prog */
+               ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+               egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+               if (!ingress_prog || !egress_prog) {
+                       printf("finding ingress/egress_prog in obj file failed\n");
+                       goto err_out;
+               }
+               prog_fd = bpf_program__fd(ingress_prog);
+               egress_prog_fd = bpf_program__fd(egress_prog);
+               if (prog_fd < 0 || egress_prog_fd < 0) {
+                       printf("find egress_prog fd failed\n");
+                       goto err_out;
+               }
+       }
+
+       signal(SIGINT, int_exit);
+       signal(SIGTERM, int_exit);
+
+       /* Init forward multicast groups and exclude group */
+       for (i = 0; ifaces[i] > 0; i++) {
+               ifindex = ifaces[i];
+
+               if (attach_egress_prog) {
+                       ret = get_mac_addr(ifindex, mac_addr);
+                       if (ret < 0) {
+                               printf("get interface %d mac failed\n", ifindex);
+                               goto err_out;
+                       }
+                       ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+                       if (ret) {
+                               perror("bpf_update_elem mac_map failed\n");
+                               goto err_out;
+                       }
+               }
+
+               /* Add all the interfaces to group all */
+               devmap_val.ifindex = ifindex;
+               devmap_val.bpf_prog.fd = egress_prog_fd;
+               ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+               if (ret) {
+                       perror("bpf_map_update_elem");
+                       goto err_out;
+               }
+
+               /* bind prog_fd to each interface */
+               ret = bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags);
+               if (ret) {
+                       printf("Set xdp fd failed on %d\n", ifindex);
+                       goto err_out;
+               }
+       }
+
+       /* sleep some time for testing */
+       sleep(999);
+
+       return 0;
+
+err_out:
+       return 1;
+}