Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
authorDavid S. Miller <davem@davemloft.net>
Tue, 14 Jul 2020 01:04:05 +0000 (18:04 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 14 Jul 2020 01:04:05 +0000 (18:04 -0700)
Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-07-13

The following pull-request contains BPF updates for your *net-next* tree.

We've added 36 non-merge commits during the last 7 day(s) which contain
a total of 62 files changed, 2242 insertions(+), 468 deletions(-).

The main changes are:

1) Avoid trace_printk warning banner by switching bpf_trace_printk to use
   its own tracing event, from Alan.

2) Better libbpf support on older kernels, from Andrii.

3) Additional AF_XDP stats, from Ciara.

4) build time resolution of BTF IDs, from Jiri.

5) BPF_CGROUP_INET_SOCK_RELEASE hook, from Stanislav.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
1  2 
Makefile
include/uapi/linux/bpf.h
kernel/bpf/btf.c
kernel/bpf/syscall.c
net/core/filter.c
net/xdp/xsk_buff_pool.c
tools/include/uapi/linux/bpf.h
tools/lib/bpf/libbpf.c

diff --combined Makefile
+++ b/Makefile
@@@ -2,7 -2,7 +2,7 @@@
  VERSION = 5
  PATCHLEVEL = 8
  SUBLEVEL = 0
 -EXTRAVERSION = -rc2
 +EXTRAVERSION = -rc4
  NAME = Kleptomaniac Octopus
  
  # *DOCUMENTATION*
@@@ -448,6 -448,7 +448,7 @@@ OBJSIZE            = $(CROSS_COMPILE)siz
  STRIP         = $(CROSS_COMPILE)strip
  endif
  PAHOLE                = pahole
+ RESOLVE_BTFIDS        = $(objtree)/tools/bpf/resolve_btfids/resolve_btfids
  LEX           = flex
  YACC          = bison
  AWK           = awk
@@@ -510,7 -511,7 +511,7 @@@ GCC_PLUGINS_CFLAGS :
  CLANG_FLAGS :=
  
  export ARCH SRCARCH CONFIG_SHELL BASH HOSTCC KBUILD_HOSTCFLAGS CROSS_COMPILE LD CC
- export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE LEX YACC AWK INSTALLKERNEL
+ export CPP AR NM STRIP OBJCOPY OBJDUMP OBJSIZE READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
  export PERL PYTHON PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
  export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ
  export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
@@@ -970,8 -971,8 +971,8 @@@ LDFLAGS_vmlinux    += --pack-dyn-relocs=re
  endif
  
  # Align the bit size of userspace programs with the kernel
 -KBUILD_USERCFLAGS  += $(filter -m32 -m64, $(KBUILD_CFLAGS))
 -KBUILD_USERLDFLAGS += $(filter -m32 -m64, $(KBUILD_CFLAGS))
 +KBUILD_USERCFLAGS  += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
 +KBUILD_USERLDFLAGS += $(filter -m32 -m64 --target=%, $(KBUILD_CFLAGS))
  
  # make the checker run with the right architecture
  CHECKFLAGS += --arch=$(ARCH)
@@@ -1053,9 -1054,10 +1054,10 @@@ export mod_sign_cm
  
  HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
  
+ has_libelf = $(call try-run,\
+                echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0)
  ifdef CONFIG_STACK_VALIDATION
-   has_libelf := $(call try-run,\
-               echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0)
    ifeq ($(has_libelf),1)
      objtool_target := tools/objtool FORCE
    else
    endif
  endif
  
+ ifdef CONFIG_DEBUG_INFO_BTF
+   ifeq ($(has_libelf),1)
+     resolve_btfids_target := tools/bpf/resolve_btfids FORCE
+   else
+     ERROR_RESOLVE_BTFIDS := 1
+   endif
+ endif
  PHONY += prepare0
  
  export MODORDER := $(extmod-prefix)modules.order
@@@ -1175,7 -1185,7 +1185,7 @@@ prepare0: archprepar
        $(Q)$(MAKE) $(build)=.
  
  # All the preparing..
- prepare: prepare0 prepare-objtool
+ prepare: prepare0 prepare-objtool prepare-resolve_btfids
  
  # Support for using generic headers in asm-generic
  asm-generic := -f $(srctree)/scripts/Makefile.asm-generic obj
@@@ -1188,7 -1198,7 +1198,7 @@@ uapi-asm-generic
        $(Q)$(MAKE) $(asm-generic)=arch/$(SRCARCH)/include/generated/uapi/asm \
        generic=include/uapi/asm-generic
  
- PHONY += prepare-objtool
+ PHONY += prepare-objtool prepare-resolve_btfids
  prepare-objtool: $(objtool_target)
  ifeq ($(SKIP_STACK_VALIDATION),1)
  ifdef CONFIG_UNWINDER_ORC
  endif
  endif
  
+ prepare-resolve_btfids: $(resolve_btfids_target)
+ ifeq ($(ERROR_RESOLVE_BTFIDS),1)
+       @echo "error: Cannot resolve BTF IDs for CONFIG_DEBUG_INFO_BTF, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
+       @false
+ endif
  # Generate some files
  # ---------------------------------------------------------------------------
  
diff --combined include/uapi/linux/bpf.h
@@@ -226,6 -226,7 +226,7 @@@ enum bpf_attach_type 
        BPF_CGROUP_INET4_GETSOCKNAME,
        BPF_CGROUP_INET6_GETSOCKNAME,
        BPF_XDP_DEVMAP,
+       BPF_CGROUP_INET_SOCK_RELEASE,
        __MAX_BPF_ATTACH_TYPE
  };
  
@@@ -3174,12 -3175,13 +3175,12 @@@ union bpf_attr 
   * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
   *    Description
   *            Copy *size* bytes from *data* into a ring buffer *ringbuf*.
 - *            If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
 - *            new data availability is sent.
 - *            IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
 - *            new data availability is sent unconditionally.
 + *            If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
 + *            of new data availability is sent.
 + *            If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
 + *            of new data availability is sent unconditionally.
   *    Return
 - *            0, on success;
 - *            < 0, on error.
 + *            0 on success, or a negative error in case of failure.
   *
   * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
   *    Description
   * void bpf_ringbuf_submit(void *data, u64 flags)
   *    Description
   *            Submit reserved ring buffer sample, pointed to by *data*.
 - *            If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
 - *            new data availability is sent.
 - *            IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
 - *            new data availability is sent unconditionally.
 + *            If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
 + *            of new data availability is sent.
 + *            If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
 + *            of new data availability is sent unconditionally.
   *    Return
   *            Nothing. Always succeeds.
   *
   * void bpf_ringbuf_discard(void *data, u64 flags)
   *    Description
   *            Discard reserved ring buffer sample, pointed to by *data*.
 - *            If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
 - *            new data availability is sent.
 - *            IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
 - *            new data availability is sent unconditionally.
 + *            If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
 + *            of new data availability is sent.
 + *            If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
 + *            of new data availability is sent unconditionally.
   *    Return
   *            Nothing. Always succeeds.
   *
   *    Description
   *            Query various characteristics of provided ring buffer. What
   *            exactly is queries is determined by *flags*:
 - *              - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
 - *              - BPF_RB_RING_SIZE - the size of ring buffer;
 - *              - BPF_RB_CONS_POS - consumer position (can wrap around);
 - *              - BPF_RB_PROD_POS - producer(s) position (can wrap around);
 - *            Data returned is just a momentary snapshots of actual values
 + *
 + *            * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed.
 + *            * **BPF_RB_RING_SIZE**: The size of ring buffer.
 + *            * **BPF_RB_CONS_POS**: Consumer position (can wrap around).
 + *            * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around).
 + *
 + *            Data returned is just a momentary snapshot of actual values
   *            and could be inaccurate, so this facility should be used to
   *            power heuristics and for reporting, not to make 100% correct
   *            calculation.
   *    Return
 - *            Requested value, or 0, if flags are not recognized.
 + *            Requested value, or 0, if *flags* are not recognized.
   *
   * long bpf_csum_level(struct sk_buff *skb, u64 level)
   *    Description
diff --combined kernel/bpf/btf.c
@@@ -18,6 -18,7 +18,7 @@@
  #include <linux/sort.h>
  #include <linux/bpf_verifier.h>
  #include <linux/btf.h>
+ #include <linux/btf_ids.h>
  #include <linux/skmsg.h>
  #include <linux/perf_event.h>
  #include <net/sock.h>
@@@ -3621,12 -3622,15 +3622,15 @@@ static int btf_translate_to_vmlinux(str
        return kern_ctx_type->type;
  }
  
+ BTF_ID_LIST(bpf_ctx_convert_btf_id)
+ BTF_ID(struct, bpf_ctx_convert)
  struct btf *btf_parse_vmlinux(void)
  {
        struct btf_verifier_env *env = NULL;
        struct bpf_verifier_log *log;
        struct btf *btf = NULL;
-       int err, btf_id;
+       int err;
  
        env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN);
        if (!env)
        if (err)
                goto errout;
  
-       /* find struct bpf_ctx_convert for type checking later */
-       btf_id = btf_find_by_name_kind(btf, "bpf_ctx_convert", BTF_KIND_STRUCT);
-       if (btf_id < 0) {
-               err = btf_id;
-               goto errout;
-       }
        /* btf_parse_vmlinux() runs under bpf_verifier_lock */
-       bpf_ctx_convert.t = btf_type_by_id(btf, btf_id);
+       bpf_ctx_convert.t = btf_type_by_id(btf, bpf_ctx_convert_btf_id[0]);
  
        /* find bpf map structs for map_ptr access checking */
        err = btf_vmlinux_map_ids_init(btf, log);
@@@ -3776,7 -3774,7 +3774,7 @@@ bool btf_ctx_access(int off, int size, 
                                return false;
  
                        t = btf_type_skip_modifiers(btf, t->type, NULL);
 -                      if (!btf_type_is_int(t)) {
 +                      if (!btf_type_is_small_int(t)) {
                                bpf_log(log,
                                        "ret type %s not allowed for fmod_ret\n",
                                        btf_kind_str[BTF_INFO_KIND(t->info)]);
        /* skip modifiers */
        while (btf_type_is_modifier(t))
                t = btf_type_by_id(btf, t->type);
 -      if (btf_type_is_int(t) || btf_type_is_enum(t))
 +      if (btf_type_is_small_int(t) || btf_type_is_enum(t))
                /* accessing a scalar */
                return true;
        if (!btf_type_is_ptr(t)) {
@@@ -4079,96 -4077,17 +4077,17 @@@ error
        return -EINVAL;
  }
  
- static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn,
-                                  int arg)
- {
-       char fnname[KSYM_SYMBOL_LEN + 4] = "btf_";
-       const struct btf_param *args;
-       const struct btf_type *t;
-       const char *tname, *sym;
-       u32 btf_id, i;
-       if (IS_ERR(btf_vmlinux)) {
-               bpf_log(log, "btf_vmlinux is malformed\n");
-               return -EINVAL;
-       }
-       sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4);
-       if (!sym) {
-               bpf_log(log, "kernel doesn't have kallsyms\n");
-               return -EFAULT;
-       }
-       for (i = 1; i <= btf_vmlinux->nr_types; i++) {
-               t = btf_type_by_id(btf_vmlinux, i);
-               if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF)
-                       continue;
-               tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
-               if (!strcmp(tname, fnname))
-                       break;
-       }
-       if (i > btf_vmlinux->nr_types) {
-               bpf_log(log, "helper %s type is not found\n", fnname);
-               return -ENOENT;
-       }
-       t = btf_type_by_id(btf_vmlinux, t->type);
-       if (!btf_type_is_ptr(t))
-               return -EFAULT;
-       t = btf_type_by_id(btf_vmlinux, t->type);
-       if (!btf_type_is_func_proto(t))
-               return -EFAULT;
-       args = (const struct btf_param *)(t + 1);
-       if (arg >= btf_type_vlen(t)) {
-               bpf_log(log, "bpf helper %s doesn't have %d-th argument\n",
-                       fnname, arg);
-               return -EINVAL;
-       }
-       t = btf_type_by_id(btf_vmlinux, args[arg].type);
-       if (!btf_type_is_ptr(t) || !t->type) {
-               /* anything but the pointer to struct is a helper config bug */
-               bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n");
-               return -EFAULT;
-       }
-       btf_id = t->type;
-       t = btf_type_by_id(btf_vmlinux, t->type);
-       /* skip modifiers */
-       while (btf_type_is_modifier(t)) {
-               btf_id = t->type;
-               t = btf_type_by_id(btf_vmlinux, t->type);
-       }
-       if (!btf_type_is_struct(t)) {
-               bpf_log(log, "ARG_PTR_TO_BTF is not a struct\n");
-               return -EFAULT;
-       }
-       bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4,
-               arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off));
-       return btf_id;
- }
  int btf_resolve_helper_id(struct bpf_verifier_log *log,
                          const struct bpf_func_proto *fn, int arg)
  {
-       int *btf_id = &fn->btf_id[arg];
-       int ret;
+       int id;
  
        if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID)
                return -EINVAL;
-       ret = READ_ONCE(*btf_id);
-       if (ret)
-               return ret;
-       /* ok to race the search. The result is the same */
-       ret = __btf_resolve_helper_id(log, fn->func, arg);
-       if (!ret) {
-               /* Function argument cannot be type 'void' */
-               bpf_log(log, "BTF resolution bug\n");
-               return -EFAULT;
-       }
-       WRITE_ONCE(*btf_id, ret);
-       return ret;
+       id = fn->btf_id[arg];
+       if (!id || id > btf_vmlinux->nr_types)
+               return -EINVAL;
+       return id;
  }
  
  static int __get_type_size(struct btf *btf, u32 btf_id,
diff --combined kernel/bpf/syscall.c
@@@ -1981,6 -1981,7 +1981,7 @@@ bpf_prog_load_check_attach(enum bpf_pro
        case BPF_PROG_TYPE_CGROUP_SOCK:
                switch (expected_attach_type) {
                case BPF_CGROUP_INET_SOCK_CREATE:
+               case BPF_CGROUP_INET_SOCK_RELEASE:
                case BPF_CGROUP_INET4_POST_BIND:
                case BPF_CGROUP_INET6_POST_BIND:
                        return 0;
@@@ -2121,7 -2122,7 +2122,7 @@@ static int bpf_prog_load(union bpf_att
            !bpf_capable())
                return -EPERM;
  
 -      if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN))
 +      if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN) && !capable(CAP_SYS_ADMIN))
                return -EPERM;
        if (is_perfmon_prog_type(type) && !perfmon_capable())
                return -EPERM;
@@@ -2779,6 -2780,7 +2780,7 @@@ attach_type_to_prog_type(enum bpf_attac
                return BPF_PROG_TYPE_CGROUP_SKB;
                break;
        case BPF_CGROUP_INET_SOCK_CREATE:
+       case BPF_CGROUP_INET_SOCK_RELEASE:
        case BPF_CGROUP_INET4_POST_BIND:
        case BPF_CGROUP_INET6_POST_BIND:
                return BPF_PROG_TYPE_CGROUP_SOCK;
@@@ -2893,11 -2895,13 +2895,11 @@@ static int bpf_prog_detach(const union 
        switch (ptype) {
        case BPF_PROG_TYPE_SK_MSG:
        case BPF_PROG_TYPE_SK_SKB:
 -              return sock_map_get_from_fd(attr, NULL);
 +              return sock_map_prog_detach(attr, ptype);
        case BPF_PROG_TYPE_LIRC_MODE2:
                return lirc_prog_detach(attr);
        case BPF_PROG_TYPE_FLOW_DISSECTOR:
 -              if (!capable(CAP_NET_ADMIN))
 -                      return -EPERM;
 -              return netns_bpf_prog_detach(attr);
 +              return netns_bpf_prog_detach(attr, ptype);
        case BPF_PROG_TYPE_CGROUP_DEVICE:
        case BPF_PROG_TYPE_CGROUP_SKB:
        case BPF_PROG_TYPE_CGROUP_SOCK:
@@@ -2927,6 -2931,7 +2929,7 @@@ static int bpf_prog_query(const union b
        case BPF_CGROUP_INET_INGRESS:
        case BPF_CGROUP_INET_EGRESS:
        case BPF_CGROUP_INET_SOCK_CREATE:
+       case BPF_CGROUP_INET_SOCK_RELEASE:
        case BPF_CGROUP_INET4_BIND:
        case BPF_CGROUP_INET6_BIND:
        case BPF_CGROUP_INET4_POST_BIND:
@@@ -3137,8 -3142,7 +3140,8 @@@ static const struct bpf_map *bpf_map_fr
        return NULL;
  }
  
 -static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog)
 +static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
 +                                            const struct cred *f_cred)
  {
        const struct bpf_map *map;
        struct bpf_insn *insns;
                    code == (BPF_JMP | BPF_CALL_ARGS)) {
                        if (code == (BPF_JMP | BPF_CALL_ARGS))
                                insns[i].code = BPF_JMP | BPF_CALL;
 -                      if (!bpf_dump_raw_ok())
 +                      if (!bpf_dump_raw_ok(f_cred))
                                insns[i].imm = 0;
                        continue;
                }
@@@ -3220,8 -3224,7 +3223,8 @@@ static int set_info_rec_size(struct bpf
        return 0;
  }
  
 -static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 +static int bpf_prog_get_info_by_fd(struct file *file,
 +                                 struct bpf_prog *prog,
                                   const union bpf_attr *attr,
                                   union bpf_attr __user *uattr)
  {
                struct bpf_insn *insns_sanitized;
                bool fault;
  
 -              if (prog->blinded && !bpf_dump_raw_ok()) {
 +              if (prog->blinded && !bpf_dump_raw_ok(file->f_cred)) {
                        info.xlated_prog_insns = 0;
                        goto done;
                }
 -              insns_sanitized = bpf_insn_prepare_dump(prog);
 +              insns_sanitized = bpf_insn_prepare_dump(prog, file->f_cred);
                if (!insns_sanitized)
                        return -ENOMEM;
                uinsns = u64_to_user_ptr(info.xlated_prog_insns);
        }
  
        if (info.jited_prog_len && ulen) {
 -              if (bpf_dump_raw_ok()) {
 +              if (bpf_dump_raw_ok(file->f_cred)) {
                        uinsns = u64_to_user_ptr(info.jited_prog_insns);
                        ulen = min_t(u32, info.jited_prog_len, ulen);
  
        ulen = info.nr_jited_ksyms;
        info.nr_jited_ksyms = prog->aux->func_cnt ? : 1;
        if (ulen) {
 -              if (bpf_dump_raw_ok()) {
 +              if (bpf_dump_raw_ok(file->f_cred)) {
                        unsigned long ksym_addr;
                        u64 __user *user_ksyms;
                        u32 i;
        ulen = info.nr_jited_func_lens;
        info.nr_jited_func_lens = prog->aux->func_cnt ? : 1;
        if (ulen) {
 -              if (bpf_dump_raw_ok()) {
 +              if (bpf_dump_raw_ok(file->f_cred)) {
                        u32 __user *user_lens;
                        u32 func_len, i;
  
        else
                info.nr_jited_line_info = 0;
        if (info.nr_jited_line_info && ulen) {
 -              if (bpf_dump_raw_ok()) {
 +              if (bpf_dump_raw_ok(file->f_cred)) {
                        __u64 __user *user_linfo;
                        u32 i;
  
@@@ -3497,8 -3500,7 +3500,8 @@@ done
        return 0;
  }
  
 -static int bpf_map_get_info_by_fd(struct bpf_map *map,
 +static int bpf_map_get_info_by_fd(struct file *file,
 +                                struct bpf_map *map,
                                  const union bpf_attr *attr,
                                  union bpf_attr __user *uattr)
  {
        return 0;
  }
  
 -static int bpf_btf_get_info_by_fd(struct btf *btf,
 +static int bpf_btf_get_info_by_fd(struct file *file,
 +                                struct btf *btf,
                                  const union bpf_attr *attr,
                                  union bpf_attr __user *uattr)
  {
        return btf_get_info_by_fd(btf, attr, uattr);
  }
  
 -static int bpf_link_get_info_by_fd(struct bpf_link *link,
 +static int bpf_link_get_info_by_fd(struct file *file,
 +                                struct bpf_link *link,
                                  const union bpf_attr *attr,
                                  union bpf_attr __user *uattr)
  {
@@@ -3611,15 -3611,15 +3614,15 @@@ static int bpf_obj_get_info_by_fd(cons
                return -EBADFD;
  
        if (f.file->f_op == &bpf_prog_fops)
 -              err = bpf_prog_get_info_by_fd(f.file->private_data, attr,
 +              err = bpf_prog_get_info_by_fd(f.file, f.file->private_data, attr,
                                              uattr);
        else if (f.file->f_op == &bpf_map_fops)
 -              err = bpf_map_get_info_by_fd(f.file->private_data, attr,
 +              err = bpf_map_get_info_by_fd(f.file, f.file->private_data, attr,
                                             uattr);
        else if (f.file->f_op == &btf_fops)
 -              err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
 +              err = bpf_btf_get_info_by_fd(f.file, f.file->private_data, attr, uattr);
        else if (f.file->f_op == &bpf_link_fops)
 -              err = bpf_link_get_info_by_fd(f.file->private_data,
 +              err = bpf_link_get_info_by_fd(f.file, f.file->private_data,
                                              attr, uattr);
        else
                err = -EINVAL;
diff --combined net/core/filter.c
@@@ -75,6 -75,7 +75,7 @@@
  #include <net/ipv6_stubs.h>
  #include <net/bpf_sk_storage.h>
  #include <net/transp_v6.h>
+ #include <linux/btf_ids.h>
  
  /**
   *    sk_filter_trim_cap - run a packet through a socket filter
@@@ -3779,7 -3780,9 +3780,9 @@@ static const struct bpf_func_proto bpf_
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
  };
  
- static int bpf_skb_output_btf_ids[5];
+ BTF_ID_LIST(bpf_skb_output_btf_ids)
+ BTF_ID(struct, sk_buff)
  const struct bpf_func_proto bpf_skb_output_proto = {
        .func           = bpf_skb_event_output,
        .gpl_only       = true,
@@@ -4173,7 -4176,9 +4176,9 @@@ static const struct bpf_func_proto bpf_
        .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
  };
  
- static int bpf_xdp_output_btf_ids[5];
+ BTF_ID_LIST(bpf_xdp_output_btf_ids)
+ BTF_ID(struct, xdp_buff)
  const struct bpf_func_proto bpf_xdp_output_proto = {
        .func           = bpf_xdp_event_output,
        .gpl_only       = true,
@@@ -5889,16 -5894,12 +5894,16 @@@ BPF_CALL_1(bpf_skb_ecn_set_ce, struct s
  {
        unsigned int iphdr_len;
  
 -      if (skb->protocol == cpu_to_be16(ETH_P_IP))
 +      switch (skb_protocol(skb, true)) {
 +      case cpu_to_be16(ETH_P_IP):
                iphdr_len = sizeof(struct iphdr);
 -      else if (skb->protocol == cpu_to_be16(ETH_P_IPV6))
 +              break;
 +      case cpu_to_be16(ETH_P_IPV6):
                iphdr_len = sizeof(struct ipv6hdr);
 -      else
 +              break;
 +      default:
                return 0;
 +      }
  
        if (skb_headlen(skb) < iphdr_len)
                return 0;
@@@ -6894,6 -6895,7 +6899,7 @@@ static bool __sock_filter_check_attach_
        case offsetof(struct bpf_sock, priority):
                switch (attach_type) {
                case BPF_CGROUP_INET_SOCK_CREATE:
+               case BPF_CGROUP_INET_SOCK_RELEASE:
                        goto full_access;
                default:
                        return false;
diff --combined net/xdp/xsk_buff_pool.c
@@@ -2,6 -2,9 +2,6 @@@
  
  #include <net/xsk_buff_pool.h>
  #include <net/xdp_sock.h>
 -#include <linux/dma-direct.h>
 -#include <linux/dma-noncoherent.h>
 -#include <linux/swiotlb.h>
  
  #include "xsk_queue.h"
  
@@@ -52,6 -55,7 +52,6 @@@ struct xsk_buff_pool *xp_create(struct 
        pool->free_heads_cnt = chunks;
        pool->headroom = headroom;
        pool->chunk_size = chunk_size;
 -      pool->cheap_dma = true;
        pool->unaligned = unaligned;
        pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM;
        INIT_LIST_HEAD(&pool->free_list);
@@@ -121,6 -125,48 +121,6 @@@ static void xp_check_dma_contiguity(str
        }
  }
  
 -static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool)
 -{
 -#if defined(CONFIG_SWIOTLB)
 -      phys_addr_t paddr;
 -      u32 i;
 -
 -      for (i = 0; i < pool->dma_pages_cnt; i++) {
 -              paddr = dma_to_phys(pool->dev, pool->dma_pages[i]);
 -              if (is_swiotlb_buffer(paddr))
 -                      return false;
 -      }
 -#endif
 -      return true;
 -}
 -
 -static bool xp_check_cheap_dma(struct xsk_buff_pool *pool)
 -{
 -#if defined(CONFIG_HAS_DMA)
 -      const struct dma_map_ops *ops = get_dma_ops(pool->dev);
 -
 -      if (ops) {
 -              return !ops->sync_single_for_cpu &&
 -                      !ops->sync_single_for_device;
 -      }
 -
 -      if (!dma_is_direct(ops))
 -              return false;
 -
 -      if (!xp_check_swiotlb_dma(pool))
 -              return false;
 -
 -      if (!dev_is_dma_coherent(pool->dev)) {
 -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) ||              \
 -      defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) ||        \
 -      defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
 -              return false;
 -#endif
 -      }
 -#endif
 -      return true;
 -}
 -
  int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
               unsigned long attrs, struct page **pages, u32 nr_pages)
  {
  
        pool->dev = dev;
        pool->dma_pages_cnt = nr_pages;
 +      pool->dma_need_sync = false;
  
        for (i = 0; i < pool->dma_pages_cnt; i++) {
                dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
                        xp_dma_unmap(pool, attrs);
                        return -ENOMEM;
                }
 +              if (dma_need_sync(dev, dma))
 +                      pool->dma_need_sync = true;
                pool->dma_pages[i] = dma;
        }
  
        if (pool->unaligned)
                xp_check_dma_contiguity(pool);
 -
 -      pool->dev = dev;
 -      pool->cheap_dma = xp_check_cheap_dma(pool);
        return 0;
  }
  EXPORT_SYMBOL(xp_dma_map);
@@@ -189,6 -235,7 +189,7 @@@ static struct xdp_buff_xsk *__xp_alloc(
  
        for (;;) {
                if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
+                       pool->fq->queue_empty_descs++;
                        xp_release(xskb);
                        return NULL;
                }
@@@ -234,7 -281,7 +235,7 @@@ struct xdp_buff *xp_alloc(struct xsk_bu
        xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
        xskb->xdp.data_meta = xskb->xdp.data;
  
 -      if (!pool->cheap_dma) {
 +      if (pool->dma_need_sync) {
                dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
                                                 pool->frame_len,
                                                 DMA_BIDIRECTIONAL);
@@@ -226,6 -226,7 +226,7 @@@ enum bpf_attach_type 
        BPF_CGROUP_INET4_GETSOCKNAME,
        BPF_CGROUP_INET6_GETSOCKNAME,
        BPF_XDP_DEVMAP,
+       BPF_CGROUP_INET_SOCK_RELEASE,
        __MAX_BPF_ATTACH_TYPE
  };
  
@@@ -3174,12 -3175,13 +3175,12 @@@ union bpf_attr 
   * int bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
   *    Description
   *            Copy *size* bytes from *data* into a ring buffer *ringbuf*.
 - *            If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
 - *            new data availability is sent.
 - *            IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
 - *            new data availability is sent unconditionally.
 + *            If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
 + *            of new data availability is sent.
 + *            If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
 + *            of new data availability is sent unconditionally.
   *    Return
 - *            0, on success;
 - *            < 0, on error.
 + *            0 on success, or a negative error in case of failure.
   *
   * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
   *    Description
   * void bpf_ringbuf_submit(void *data, u64 flags)
   *    Description
   *            Submit reserved ring buffer sample, pointed to by *data*.
 - *            If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
 - *            new data availability is sent.
 - *            IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
 - *            new data availability is sent unconditionally.
 + *            If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
 + *            of new data availability is sent.
 + *            If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
 + *            of new data availability is sent unconditionally.
   *    Return
   *            Nothing. Always succeeds.
   *
   * void bpf_ringbuf_discard(void *data, u64 flags)
   *    Description
   *            Discard reserved ring buffer sample, pointed to by *data*.
 - *            If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
 - *            new data availability is sent.
 - *            IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
 - *            new data availability is sent unconditionally.
 + *            If **BPF_RB_NO_WAKEUP** is specified in *flags*, no notification
 + *            of new data availability is sent.
 + *            If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
 + *            of new data availability is sent unconditionally.
   *    Return
   *            Nothing. Always succeeds.
   *
   *    Description
   *            Query various characteristics of provided ring buffer. What
   *            exactly is queries is determined by *flags*:
 - *              - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
 - *              - BPF_RB_RING_SIZE - the size of ring buffer;
 - *              - BPF_RB_CONS_POS - consumer position (can wrap around);
 - *              - BPF_RB_PROD_POS - producer(s) position (can wrap around);
 - *            Data returned is just a momentary snapshots of actual values
 + *
 + *            * **BPF_RB_AVAIL_DATA**: Amount of data not yet consumed.
 + *            * **BPF_RB_RING_SIZE**: The size of ring buffer.
 + *            * **BPF_RB_CONS_POS**: Consumer position (can wrap around).
 + *            * **BPF_RB_PROD_POS**: Producer(s) position (can wrap around).
 + *
 + *            Data returned is just a momentary snapshot of actual values
   *            and could be inaccurate, so this facility should be used to
   *            power heuristics and for reporting, not to make 100% correct
   *            calculation.
   *    Return
 - *            Requested value, or 0, if flags are not recognized.
 + *            Requested value, or 0, if *flags* are not recognized.
   *
   * long bpf_csum_level(struct sk_buff *skb, u64 level)
   *    Description
diff --combined tools/lib/bpf/libbpf.c
@@@ -2338,18 -2338,23 +2338,23 @@@ static bool section_have_execinstr(stru
        return false;
  }
  
- static void bpf_object__sanitize_btf(struct bpf_object *obj)
+ static bool btf_needs_sanitization(struct bpf_object *obj)
+ {
+       bool has_func_global = obj->caps.btf_func_global;
+       bool has_datasec = obj->caps.btf_datasec;
+       bool has_func = obj->caps.btf_func;
+       return !has_func || !has_datasec || !has_func_global;
+ }
+ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
  {
        bool has_func_global = obj->caps.btf_func_global;
        bool has_datasec = obj->caps.btf_datasec;
        bool has_func = obj->caps.btf_func;
-       struct btf *btf = obj->btf;
        struct btf_type *t;
        int i, j, vlen;
  
-       if (!obj->btf || (has_func && has_datasec && has_func_global))
-               return;
        for (i = 1; i <= btf__get_nr_types(btf); i++) {
                t = (struct btf_type *)btf__type_by_id(btf, i);
  
        }
  }
  
- static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
- {
-       if (!obj->btf_ext)
-               return;
-       if (!obj->caps.btf_func) {
-               btf_ext__free(obj->btf_ext);
-               obj->btf_ext = NULL;
-       }
- }
  static bool libbpf_needs_btf(const struct bpf_object *obj)
  {
        return obj->efile.btf_maps_shndx >= 0 ||
@@@ -2473,19 -2467,11 +2467,11 @@@ static int bpf_object__finalize_btf(str
                return 0;
  
        err = btf__finalize_data(obj, obj->btf);
-       if (!err)
-               return 0;
-       pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
-       btf__free(obj->btf);
-       obj->btf = NULL;
-       btf_ext__free(obj->btf_ext);
-       obj->btf_ext = NULL;
-       if (libbpf_needs_btf(obj)) {
-               pr_warn("BTF is required, but is missing or corrupted.\n");
-               return -ENOENT;
+       if (err) {
+               pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
+               return err;
        }
        return 0;
  }
  
@@@ -2538,30 -2524,45 +2524,45 @@@ static int bpf_object__load_vmlinux_btf
  
  static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
  {
+       struct btf *kern_btf = obj->btf;
+       bool btf_mandatory, sanitize;
        int err = 0;
  
        if (!obj->btf)
                return 0;
  
-       bpf_object__sanitize_btf(obj);
-       bpf_object__sanitize_btf_ext(obj);
+       sanitize = btf_needs_sanitization(obj);
+       if (sanitize) {
+               const void *raw_data;
+               __u32 sz;
  
-       err = btf__load(obj->btf);
-       if (err) {
-               pr_warn("Error loading %s into kernel: %d.\n",
-                       BTF_ELF_SEC, err);
-               btf__free(obj->btf);
-               obj->btf = NULL;
-               /* btf_ext can't exist without btf, so free it as well */
-               if (obj->btf_ext) {
-                       btf_ext__free(obj->btf_ext);
-                       obj->btf_ext = NULL;
-               }
+               /* clone BTF to sanitize a copy and leave the original intact */
+               raw_data = btf__get_raw_data(obj->btf, &sz);
+               kern_btf = btf__new(raw_data, sz);
+               if (IS_ERR(kern_btf))
+                       return PTR_ERR(kern_btf);
  
-               if (kernel_needs_btf(obj))
-                       return err;
+               bpf_object__sanitize_btf(obj, kern_btf);
        }
-       return 0;
+       err = btf__load(kern_btf);
+       if (sanitize) {
+               if (!err) {
+                       /* move fd to libbpf's BTF */
+                       btf__set_fd(obj->btf, btf__fd(kern_btf));
+                       btf__set_fd(kern_btf, -1);
+               }
+               btf__free(kern_btf);
+       }
+       if (err) {
+               btf_mandatory = kernel_needs_btf(obj);
+               pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
+                       btf_mandatory ? "BTF is mandatory, can't proceed."
+                                     : "BTF is optional, ignoring.");
+               if (!btf_mandatory)
+                       err = 0;
+       }
+       return err;
  }
  
  static int bpf_object__elf_collect(struct bpf_object *obj)
@@@ -3785,7 -3786,7 +3786,7 @@@ static int bpf_object__create_map(struc
        create_attr.btf_fd = 0;
        create_attr.btf_key_type_id = 0;
        create_attr.btf_value_type_id = 0;
-       if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+       if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {
                create_attr.btf_fd = btf__fd(obj->btf);
                create_attr.btf_key_type_id = map->btf_key_type_id;
                create_attr.btf_value_type_id = map->btf_value_type_id;
@@@ -4977,13 -4978,7 +4978,13 @@@ bpf_core_reloc_fields(struct bpf_objec
                        err = -EINVAL;
                        goto out;
                }
 -              prog = bpf_object__find_program_by_title(obj, sec_name);
 +              prog = NULL;
 +              for (i = 0; i < obj->nr_programs; i++) {
 +                      if (!strcmp(obj->programs[i].section_name, sec_name)) {
 +                              prog = &obj->programs[i];
 +                              break;
 +                      }
 +              }
                if (!prog) {
                        pr_warn("failed to find program '%s' for CO-RE offset relocation\n",
                                sec_name);
@@@ -5375,18 -5370,17 +5376,17 @@@ load_program(struct bpf_program *prog, 
                load_attr.kern_version = kern_version;
                load_attr.prog_ifindex = prog->prog_ifindex;
        }
-       /* if .BTF.ext was loaded, kernel supports associated BTF for prog */
-       if (prog->obj->btf_ext)
-               btf_fd = bpf_object__btf_fd(prog->obj);
-       else
-               btf_fd = -1;
-       load_attr.prog_btf_fd = btf_fd >= 0 ? btf_fd : 0;
-       load_attr.func_info = prog->func_info;
-       load_attr.func_info_rec_size = prog->func_info_rec_size;
-       load_attr.func_info_cnt = prog->func_info_cnt;
-       load_attr.line_info = prog->line_info;
-       load_attr.line_info_rec_size = prog->line_info_rec_size;
-       load_attr.line_info_cnt = prog->line_info_cnt;
+       /* specify func_info/line_info only if kernel supports them */
+       btf_fd = bpf_object__btf_fd(prog->obj);
+       if (btf_fd >= 0 && prog->obj->caps.btf_func) {
+               load_attr.prog_btf_fd = btf_fd;
+               load_attr.func_info = prog->func_info;
+               load_attr.func_info_rec_size = prog->func_info_rec_size;
+               load_attr.func_info_cnt = prog->func_info_cnt;
+               load_attr.line_info = prog->line_info;
+               load_attr.line_info_rec_size = prog->line_info_rec_size;
+               load_attr.line_info_cnt = prog->line_info_cnt;
+       }
        load_attr.log_level = prog->log_level;
        load_attr.prog_flags = prog->prog_flags;
  
@@@ -6910,7 -6904,7 +6910,7 @@@ static const struct bpf_sec_def section
                .expected_attach_type = BPF_TRACE_ITER,
                .is_attach_btf = true,
                .attach_fn = attach_iter),
 -      BPF_EAPROG_SEC("xdp_devmap",            BPF_PROG_TYPE_XDP,
 +      BPF_EAPROG_SEC("xdp_devmap/",           BPF_PROG_TYPE_XDP,
                                                BPF_XDP_DEVMAP),
        BPF_PROG_SEC("xdp",                     BPF_PROG_TYPE_XDP),
        BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
        BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
                                                BPF_CGROUP_INET_EGRESS),
        BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
+       BPF_EAPROG_SEC("cgroup/sock_create",    BPF_PROG_TYPE_CGROUP_SOCK,
+                                               BPF_CGROUP_INET_SOCK_CREATE),
+       BPF_EAPROG_SEC("cgroup/sock_release",   BPF_PROG_TYPE_CGROUP_SOCK,
+                                               BPF_CGROUP_INET_SOCK_RELEASE),
        BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
                                                BPF_CGROUP_INET_SOCK_CREATE),
        BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
@@@ -8588,7 -8586,7 +8592,7 @@@ static struct perf_buffer *__perf_buffe
                                              struct perf_buffer_params *p)
  {
        const char *online_cpus_file = "/sys/devices/system/cpu/online";
-       struct bpf_map_info map = {};
+       struct bpf_map_info map;
        char msg[STRERR_BUFSIZE];
        struct perf_buffer *pb;
        bool *online = NULL;
                return ERR_PTR(-EINVAL);
        }
  
+       /* best-effort sanity checks */
+       memset(&map, 0, sizeof(map));
        map_info_len = sizeof(map);
        err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
        if (err) {
                err = -errno;
-               pr_warn("failed to get map info for map FD %d: %s\n",
-                       map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
-               return ERR_PTR(err);
-       }
-       if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
-               pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
-                       map.name);
-               return ERR_PTR(-EINVAL);
+               /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
+                * -EBADFD, -EFAULT, or -E2BIG on real error
+                */
+               if (err != -EINVAL) {
+                       pr_warn("failed to get map info for map FD %d: %s\n",
+                               map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
+                       return ERR_PTR(err);
+               }
+               pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
+                        map_fd);
+       } else {
+               if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+                       pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+                               map.name);
+                       return ERR_PTR(-EINVAL);
+               }
        }
  
        pb = calloc(1, sizeof(*pb));
                        err = pb->cpu_cnt;
                        goto error;
                }
-               if (map.max_entries < pb->cpu_cnt)
+               if (map.max_entries && map.max_entries < pb->cpu_cnt)
                        pb->cpu_cnt = map.max_entries;
        }