tools/lib/bpf/libbpf.c

   1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3 /*
   4  * Common eBPF ELF object loading operations.
   5  *
   6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
   7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
   8  * Copyright (C) 2015 Huawei Inc.
   9  * Copyright (C) 2017 Nicira, Inc.
  10  * Copyright (C) 2019 Isovalent, Inc.
  11  */
  12
  13 #ifndef _GNU_SOURCE
  14 #define _GNU_SOURCE
  15 #endif
  16 #include <stdlib.h>
  17 #include <stdio.h>
  18 #include <stdarg.h>
  19 #include <libgen.h>
  20 #include <inttypes.h>
  21 #include <limits.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <endian.h>
  25 #include <fcntl.h>
  26 #include <errno.h>
  27 #include <ctype.h>
  28 #include <asm/unistd.h>
  29 #include <linux/err.h>
  30 #include <linux/kernel.h>
  31 #include <linux/bpf.h>
  32 #include <linux/btf.h>
  33 #include <linux/filter.h>
  34 #include <linux/limits.h>
  35 #include <linux/perf_event.h>
  36 #include <linux/ring_buffer.h>
  37 #include <linux/version.h>
  38 #include <sys/epoll.h>
  39 #include <sys/ioctl.h>
  40 #include <sys/mman.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 #include <sys/vfs.h>
  44 #include <sys/utsname.h>
  45 #include <sys/resource.h>
  46 #include <libelf.h>
  47 #include <gelf.h>
  48 #include <zlib.h>
  49
  50 #include "libbpf.h"
  51 #include "bpf.h"
  52 #include "btf.h"
  53 #include "str_error.h"
  54 #include "libbpf_internal.h"
  55 #include "hashmap.h"
  56 #include "bpf_gen_internal.h"
  57
  58 #ifndef BPF_FS_MAGIC
  59 #define BPF_FS_MAGIC            0xcafe4a11
  60 #endif
  61
  62 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
  63
  64 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
  65  * compilation if user enables corresponding warning. Disable it explicitly.
  66  */
  67 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
  68
  69 #define __printf(a, b)  __attribute__((format(printf, a, b)))
  70
  71 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
  72 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
  73
  74 static const char * const attach_type_name[] = {
  75         [BPF_CGROUP_INET_INGRESS]       = "cgroup_inet_ingress",
  76         [BPF_CGROUP_INET_EGRESS]        = "cgroup_inet_egress",
  77         [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
  78         [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
  79         [BPF_CGROUP_SOCK_OPS]           = "cgroup_sock_ops",
  80         [BPF_CGROUP_DEVICE]             = "cgroup_device",
  81         [BPF_CGROUP_INET4_BIND]         = "cgroup_inet4_bind",
  82         [BPF_CGROUP_INET6_BIND]         = "cgroup_inet6_bind",
  83         [BPF_CGROUP_INET4_CONNECT]      = "cgroup_inet4_connect",
  84         [BPF_CGROUP_INET6_CONNECT]      = "cgroup_inet6_connect",
  85         [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
  86         [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
  87         [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
  88         [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
  89         [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
  90         [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
  91         [BPF_CGROUP_UDP4_SENDMSG]       = "cgroup_udp4_sendmsg",
  92         [BPF_CGROUP_UDP6_SENDMSG]       = "cgroup_udp6_sendmsg",
  93         [BPF_CGROUP_SYSCTL]             = "cgroup_sysctl",
  94         [BPF_CGROUP_UDP4_RECVMSG]       = "cgroup_udp4_recvmsg",
  95         [BPF_CGROUP_UDP6_RECVMSG]       = "cgroup_udp6_recvmsg",
  96         [BPF_CGROUP_GETSOCKOPT]         = "cgroup_getsockopt",
  97         [BPF_CGROUP_SETSOCKOPT]         = "cgroup_setsockopt",
  98         [BPF_SK_SKB_STREAM_PARSER]      = "sk_skb_stream_parser",
  99         [BPF_SK_SKB_STREAM_VERDICT]     = "sk_skb_stream_verdict",
 100         [BPF_SK_SKB_VERDICT]            = "sk_skb_verdict",
 101         [BPF_SK_MSG_VERDICT]            = "sk_msg_verdict",
 102         [BPF_LIRC_MODE2]                = "lirc_mode2",
 103         [BPF_FLOW_DISSECTOR]            = "flow_dissector",
 104         [BPF_TRACE_RAW_TP]              = "trace_raw_tp",
 105         [BPF_TRACE_FENTRY]              = "trace_fentry",
 106         [BPF_TRACE_FEXIT]               = "trace_fexit",
 107         [BPF_MODIFY_RETURN]             = "modify_return",
 108         [BPF_LSM_MAC]                   = "lsm_mac",
 109         [BPF_LSM_CGROUP]                = "lsm_cgroup",
 110         [BPF_SK_LOOKUP]                 = "sk_lookup",
 111         [BPF_TRACE_ITER]                = "trace_iter",
 112         [BPF_XDP_DEVMAP]                = "xdp_devmap",
 113         [BPF_XDP_CPUMAP]                = "xdp_cpumap",
 114         [BPF_XDP]                       = "xdp",
 115         [BPF_SK_REUSEPORT_SELECT]       = "sk_reuseport_select",
 116         [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
 117         [BPF_PERF_EVENT]                = "perf_event",
 118         [BPF_TRACE_KPROBE_MULTI]        = "trace_kprobe_multi",
 119 };
 120
 121 static const char * const link_type_name[] = {
 122         [BPF_LINK_TYPE_UNSPEC]                  = "unspec",
 123         [BPF_LINK_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 124         [BPF_LINK_TYPE_TRACING]                 = "tracing",
 125         [BPF_LINK_TYPE_CGROUP]                  = "cgroup",
 126         [BPF_LINK_TYPE_ITER]                    = "iter",
 127         [BPF_LINK_TYPE_NETNS]                   = "netns",
 128         [BPF_LINK_TYPE_XDP]                     = "xdp",
 129         [BPF_LINK_TYPE_PERF_EVENT]              = "perf_event",
 130         [BPF_LINK_TYPE_KPROBE_MULTI]            = "kprobe_multi",
 131         [BPF_LINK_TYPE_STRUCT_OPS]              = "struct_ops",
 132 };
 133
 134 static const char * const map_type_name[] = {
 135         [BPF_MAP_TYPE_UNSPEC]                   = "unspec",
 136         [BPF_MAP_TYPE_HASH]                     = "hash",
 137         [BPF_MAP_TYPE_ARRAY]                    = "array",
 138         [BPF_MAP_TYPE_PROG_ARRAY]               = "prog_array",
 139         [BPF_MAP_TYPE_PERF_EVENT_ARRAY]         = "perf_event_array",
 140         [BPF_MAP_TYPE_PERCPU_HASH]              = "percpu_hash",
 141         [BPF_MAP_TYPE_PERCPU_ARRAY]             = "percpu_array",
 142         [BPF_MAP_TYPE_STACK_TRACE]              = "stack_trace",
 143         [BPF_MAP_TYPE_CGROUP_ARRAY]             = "cgroup_array",
 144         [BPF_MAP_TYPE_LRU_HASH]                 = "lru_hash",
 145         [BPF_MAP_TYPE_LRU_PERCPU_HASH]          = "lru_percpu_hash",
 146         [BPF_MAP_TYPE_LPM_TRIE]                 = "lpm_trie",
 147         [BPF_MAP_TYPE_ARRAY_OF_MAPS]            = "array_of_maps",
 148         [BPF_MAP_TYPE_HASH_OF_MAPS]             = "hash_of_maps",
 149         [BPF_MAP_TYPE_DEVMAP]                   = "devmap",
 150         [BPF_MAP_TYPE_DEVMAP_HASH]              = "devmap_hash",
 151         [BPF_MAP_TYPE_SOCKMAP]                  = "sockmap",
 152         [BPF_MAP_TYPE_CPUMAP]                   = "cpumap",
 153         [BPF_MAP_TYPE_XSKMAP]                   = "xskmap",
 154         [BPF_MAP_TYPE_SOCKHASH]                 = "sockhash",
 155         [BPF_MAP_TYPE_CGROUP_STORAGE]           = "cgroup_storage",
 156         [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]      = "reuseport_sockarray",
 157         [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
 158         [BPF_MAP_TYPE_QUEUE]                    = "queue",
 159         [BPF_MAP_TYPE_STACK]                    = "stack",
 160         [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
 161         [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
 162         [BPF_MAP_TYPE_RINGBUF]                  = "ringbuf",
 163         [BPF_MAP_TYPE_INODE_STORAGE]            = "inode_storage",
 164         [BPF_MAP_TYPE_TASK_STORAGE]             = "task_storage",
 165         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
 166         [BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 167 };
 168
 169 static const char * const prog_type_name[] = {
 170         [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
 171         [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
 172         [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
 173         [BPF_PROG_TYPE_SCHED_CLS]               = "sched_cls",
 174         [BPF_PROG_TYPE_SCHED_ACT]               = "sched_act",
 175         [BPF_PROG_TYPE_TRACEPOINT]              = "tracepoint",
 176         [BPF_PROG_TYPE_XDP]                     = "xdp",
 177         [BPF_PROG_TYPE_PERF_EVENT]              = "perf_event",
 178         [BPF_PROG_TYPE_CGROUP_SKB]              = "cgroup_skb",
 179         [BPF_PROG_TYPE_CGROUP_SOCK]             = "cgroup_sock",
 180         [BPF_PROG_TYPE_LWT_IN]                  = "lwt_in",
 181         [BPF_PROG_TYPE_LWT_OUT]                 = "lwt_out",
 182         [BPF_PROG_TYPE_LWT_XMIT]                = "lwt_xmit",
 183         [BPF_PROG_TYPE_SOCK_OPS]                = "sock_ops",
 184         [BPF_PROG_TYPE_SK_SKB]                  = "sk_skb",
 185         [BPF_PROG_TYPE_CGROUP_DEVICE]           = "cgroup_device",
 186         [BPF_PROG_TYPE_SK_MSG]                  = "sk_msg",
 187         [BPF_PROG_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 188         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]        = "cgroup_sock_addr",
 189         [BPF_PROG_TYPE_LWT_SEG6LOCAL]           = "lwt_seg6local",
 190         [BPF_PROG_TYPE_LIRC_MODE2]              = "lirc_mode2",
 191         [BPF_PROG_TYPE_SK_REUSEPORT]            = "sk_reuseport",
 192         [BPF_PROG_TYPE_FLOW_DISSECTOR]          = "flow_dissector",
 193         [BPF_PROG_TYPE_CGROUP_SYSCTL]           = "cgroup_sysctl",
 194         [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
 195         [BPF_PROG_TYPE_CGROUP_SOCKOPT]          = "cgroup_sockopt",
 196         [BPF_PROG_TYPE_TRACING]                 = "tracing",
 197         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
 198         [BPF_PROG_TYPE_EXT]                     = "ext",
 199         [BPF_PROG_TYPE_LSM]                     = "lsm",
 200         [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
 201         [BPF_PROG_TYPE_SYSCALL]                 = "syscall",
 202 };
 203
 204 static int __base_pr(enum libbpf_print_level level, const char *format,
 205                      va_list args)
 206 {
 207         if (level == LIBBPF_DEBUG)
 208                 return 0;
 209
 210         return vfprintf(stderr, format, args);
 211 }
 212
 213 static libbpf_print_fn_t __libbpf_pr = __base_pr;
 214
 215 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
 216 {
 217         libbpf_print_fn_t old_print_fn = __libbpf_pr;
 218
 219         __libbpf_pr = fn;
 220         return old_print_fn;
 221 }
 222
 223 __printf(2, 3)
 224 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 225 {
 226         va_list args;
 227         int old_errno;
 228
 229         if (!__libbpf_pr)
 230                 return;
 231
 232         old_errno = errno;
 233
 234         va_start(args, format);
 235         __libbpf_pr(level, format, args);
 236         va_end(args);
 237
 238         errno = old_errno;
 239 }
 240
 241 static void pr_perm_msg(int err)
 242 {
 243         struct rlimit limit;
 244         char buf[100];
 245
 246         if (err != -EPERM || geteuid() != 0)
 247                 return;
 248
 249         err = getrlimit(RLIMIT_MEMLOCK, &limit);
 250         if (err)
 251                 return;
 252
 253         if (limit.rlim_cur == RLIM_INFINITY)
 254                 return;
 255
 256         if (limit.rlim_cur < 1024)
 257                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
 258         else if (limit.rlim_cur < 1024*1024)
 259                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
 260         else
 261                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
 262
 263         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
 264                 buf);
 265 }
 266
 267 #define STRERR_BUFSIZE  128
 268
 269 /* Copied from tools/perf/util/util.h */
 270 #ifndef zfree
 271 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
 272 #endif
 273
 274 #ifndef zclose
 275 # define zclose(fd) ({                  \
 276         int ___err = 0;                 \
 277         if ((fd) >= 0)                  \
 278                 ___err = close((fd));   \
 279         fd = -1;                        \
 280         ___err; })
 281 #endif
 282
 283 static inline __u64 ptr_to_u64(const void *ptr)
 284 {
 285         return (__u64) (unsigned long) ptr;
 286 }
 287
 288 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 289 {
 290         /* as of v1.0 libbpf_set_strict_mode() is a no-op */
 291         return 0;
 292 }
 293
 294 __u32 libbpf_major_version(void)
 295 {
 296         return LIBBPF_MAJOR_VERSION;
 297 }
 298
 299 __u32 libbpf_minor_version(void)
 300 {
 301         return LIBBPF_MINOR_VERSION;
 302 }
 303
 304 const char *libbpf_version_string(void)
 305 {
 306 #define __S(X) #X
 307 #define _S(X) __S(X)
 308         return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
 309 #undef _S
 310 #undef __S
 311 }
 312
 313 enum reloc_type {
 314         RELO_LD64,
 315         RELO_CALL,
 316         RELO_DATA,
 317         RELO_EXTERN_VAR,
 318         RELO_EXTERN_FUNC,
 319         RELO_SUBPROG_ADDR,
 320         RELO_CORE,
 321 };
 322
 323 struct reloc_desc {
 324         enum reloc_type type;
 325         int insn_idx;
 326         union {
 327                 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
 328                 struct {
 329                         int map_idx;
 330                         int sym_off;
 331                 };
 332         };
 333 };
 334
 335 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
 336 enum sec_def_flags {
 337         SEC_NONE = 0,
 338         /* expected_attach_type is optional, if kernel doesn't support that */
 339         SEC_EXP_ATTACH_OPT = 1,
 340         /* legacy, only used by libbpf_get_type_names() and
 341          * libbpf_attach_type_by_name(), not used by libbpf itself at all.
 342          * This used to be associated with cgroup (and few other) BPF programs
 343          * that were attachable through BPF_PROG_ATTACH command. Pretty
 344          * meaningless nowadays, though.
 345          */
 346         SEC_ATTACHABLE = 2,
 347         SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
 348         /* attachment target is specified through BTF ID in either kernel or
 349          * other BPF program's BTF object */
 350         SEC_ATTACH_BTF = 4,
 351         /* BPF program type allows sleeping/blocking in kernel */
 352         SEC_SLEEPABLE = 8,
 353         /* BPF program support non-linear XDP buffer */
 354         SEC_XDP_FRAGS = 16,
 355 };
 356
 357 struct bpf_sec_def {
 358         char *sec;
 359         enum bpf_prog_type prog_type;
 360         enum bpf_attach_type expected_attach_type;
 361         long cookie;
 362         int handler_id;
 363
 364         libbpf_prog_setup_fn_t prog_setup_fn;
 365         libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
 366         libbpf_prog_attach_fn_t prog_attach_fn;
 367 };
 368
 369 /*
 370  * bpf_prog should be a better name but it has been used in
 371  * linux/filter.h.
 372  */
 373 struct bpf_program {
 374         char *name;
 375         char *sec_name;
 376         size_t sec_idx;
 377         const struct bpf_sec_def *sec_def;
 378         /* this program's instruction offset (in number of instructions)
 379          * within its containing ELF section
 380          */
 381         size_t sec_insn_off;
 382         /* number of original instructions in ELF section belonging to this
 383          * program, not taking into account subprogram instructions possible
 384          * appended later during relocation
 385          */
 386         size_t sec_insn_cnt;
 387         /* Offset (in number of instructions) of the start of instruction
 388          * belonging to this BPF program  within its containing main BPF
 389          * program. For the entry-point (main) BPF program, this is always
 390          * zero. For a sub-program, this gets reset before each of main BPF
 391          * programs are processed and relocated and is used to determined
 392          * whether sub-program was already appended to the main program, and
 393          * if yes, at which instruction offset.
 394          */
 395         size_t sub_insn_off;
 396
 397         /* instructions that belong to BPF program; insns[0] is located at
 398          * sec_insn_off instruction within its ELF section in ELF file, so
 399          * when mapping ELF file instruction index to the local instruction,
 400          * one needs to subtract sec_insn_off; and vice versa.
 401          */
 402         struct bpf_insn *insns;
 403         /* actual number of instruction in this BPF program's image; for
 404          * entry-point BPF programs this includes the size of main program
 405          * itself plus all the used sub-programs, appended at the end
 406          */
 407         size_t insns_cnt;
 408
 409         struct reloc_desc *reloc_desc;
 410         int nr_reloc;
 411
 412         /* BPF verifier log settings */
 413         char *log_buf;
 414         size_t log_size;
 415         __u32 log_level;
 416
 417         struct bpf_object *obj;
 418
 419         int fd;
 420         bool autoload;
 421         bool autoattach;
 422         bool mark_btf_static;
 423         enum bpf_prog_type type;
 424         enum bpf_attach_type expected_attach_type;
 425
 426         int prog_ifindex;
 427         __u32 attach_btf_obj_fd;
 428         __u32 attach_btf_id;
 429         __u32 attach_prog_fd;
 430
 431         void *func_info;
 432         __u32 func_info_rec_size;
 433         __u32 func_info_cnt;
 434
 435         void *line_info;
 436         __u32 line_info_rec_size;
 437         __u32 line_info_cnt;
 438         __u32 prog_flags;
 439 };
 440
 441 struct bpf_struct_ops {
 442         const char *tname;
 443         const struct btf_type *type;
 444         struct bpf_program **progs;
 445         __u32 *kern_func_off;
 446         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
 447         void *data;
 448         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
 449          *      btf_vmlinux's format.
 450          * struct bpf_struct_ops_tcp_congestion_ops {
 451          *      [... some other kernel fields ...]
 452          *      struct tcp_congestion_ops data;
 453          * }
 454          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
 455          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
 456          * from "data".
 457          */
 458         void *kern_vdata;
 459         __u32 type_id;
 460 };
 461
 462 #define DATA_SEC ".data"
 463 #define BSS_SEC ".bss"
 464 #define RODATA_SEC ".rodata"
 465 #define KCONFIG_SEC ".kconfig"
 466 #define KSYMS_SEC ".ksyms"
 467 #define STRUCT_OPS_SEC ".struct_ops"
 468
 469 enum libbpf_map_type {
 470         LIBBPF_MAP_UNSPEC,
 471         LIBBPF_MAP_DATA,
 472         LIBBPF_MAP_BSS,
 473         LIBBPF_MAP_RODATA,
 474         LIBBPF_MAP_KCONFIG,
 475 };
 476
 477 struct bpf_map_def {
 478         unsigned int type;
 479         unsigned int key_size;
 480         unsigned int value_size;
 481         unsigned int max_entries;
 482         unsigned int map_flags;
 483 };
 484
 485 struct bpf_map {
 486         struct bpf_object *obj;
 487         char *name;
 488         /* real_name is defined for special internal maps (.rodata*,
 489          * .data*, .bss, .kconfig) and preserves their original ELF section
 490          * name. This is important to be be able to find corresponding BTF
 491          * DATASEC information.
 492          */
 493         char *real_name;
 494         int fd;
 495         int sec_idx;
 496         size_t sec_offset;
 497         int map_ifindex;
 498         int inner_map_fd;
 499         struct bpf_map_def def;
 500         __u32 numa_node;
 501         __u32 btf_var_idx;
 502         __u32 btf_key_type_id;
 503         __u32 btf_value_type_id;
 504         __u32 btf_vmlinux_value_type_id;
 505         enum libbpf_map_type libbpf_type;
 506         void *mmaped;
 507         struct bpf_struct_ops *st_ops;
 508         struct bpf_map *inner_map;
 509         void **init_slots;
 510         int init_slots_sz;
 511         char *pin_path;
 512         bool pinned;
 513         bool reused;
 514         bool autocreate;
 515         __u64 map_extra;
 516 };
 517
 518 enum extern_type {
 519         EXT_UNKNOWN,
 520         EXT_KCFG,
 521         EXT_KSYM,
 522 };
 523
 524 enum kcfg_type {
 525         KCFG_UNKNOWN,
 526         KCFG_CHAR,
 527         KCFG_BOOL,
 528         KCFG_INT,
 529         KCFG_TRISTATE,
 530         KCFG_CHAR_ARR,
 531 };
 532
 533 struct extern_desc {
 534         enum extern_type type;
 535         int sym_idx;
 536         int btf_id;
 537         int sec_btf_id;
 538         const char *name;
 539         bool is_set;
 540         bool is_weak;
 541         union {
 542                 struct {
 543                         enum kcfg_type type;
 544                         int sz;
 545                         int align;
 546                         int data_off;
 547                         bool is_signed;
 548                 } kcfg;
 549                 struct {
 550                         unsigned long long addr;
 551
 552                         /* target btf_id of the corresponding kernel var. */
 553                         int kernel_btf_obj_fd;
 554                         int kernel_btf_id;
 555
 556                         /* local btf_id of the ksym extern's type. */
 557                         __u32 type_id;
 558                         /* BTF fd index to be patched in for insn->off, this is
 559                          * 0 for vmlinux BTF, index in obj->fd_array for module
 560                          * BTF
 561                          */
 562                         __s16 btf_fd_idx;
 563                 } ksym;
 564         };
 565 };
 566
 567 struct module_btf {
 568         struct btf *btf;
 569         char *name;
 570         __u32 id;
 571         int fd;
 572         int fd_array_idx;
 573 };
 574
 575 enum sec_type {
 576         SEC_UNUSED = 0,
 577         SEC_RELO,
 578         SEC_BSS,
 579         SEC_DATA,
 580         SEC_RODATA,
 581 };
 582
 583 struct elf_sec_desc {
 584         enum sec_type sec_type;
 585         Elf64_Shdr *shdr;
 586         Elf_Data *data;
 587 };
 588
 589 struct elf_state {
 590         int fd;
 591         const void *obj_buf;
 592         size_t obj_buf_sz;
 593         Elf *elf;
 594         Elf64_Ehdr *ehdr;
 595         Elf_Data *symbols;
 596         Elf_Data *st_ops_data;
 597         size_t shstrndx; /* section index for section name strings */
 598         size_t strtabidx;
 599         struct elf_sec_desc *secs;
 600         size_t sec_cnt;
 601         int btf_maps_shndx;
 602         __u32 btf_maps_sec_btf_id;
 603         int text_shndx;
 604         int symbols_shndx;
 605         int st_ops_shndx;
 606 };
 607
 608 struct usdt_manager;
 609
 610 struct bpf_object {
 611         char name[BPF_OBJ_NAME_LEN];
 612         char license[64];
 613         __u32 kern_version;
 614
 615         struct bpf_program *programs;
 616         size_t nr_programs;
 617         struct bpf_map *maps;
 618         size_t nr_maps;
 619         size_t maps_cap;
 620
 621         char *kconfig;
 622         struct extern_desc *externs;
 623         int nr_extern;
 624         int kconfig_map_idx;
 625
 626         bool loaded;
 627         bool has_subcalls;
 628         bool has_rodata;
 629
 630         struct bpf_gen *gen_loader;
 631
 632         /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
 633         struct elf_state efile;
 634
 635         struct btf *btf;
 636         struct btf_ext *btf_ext;
 637
 638         /* Parse and load BTF vmlinux if any of the programs in the object need
 639          * it at load time.
 640          */
 641         struct btf *btf_vmlinux;
 642         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
 643          * override for vmlinux BTF.
 644          */
 645         char *btf_custom_path;
 646         /* vmlinux BTF override for CO-RE relocations */
 647         struct btf *btf_vmlinux_override;
 648         /* Lazily initialized kernel module BTFs */
 649         struct module_btf *btf_modules;
 650         bool btf_modules_loaded;
 651         size_t btf_module_cnt;
 652         size_t btf_module_cap;
 653
 654         /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
 655         char *log_buf;
 656         size_t log_size;
 657         __u32 log_level;
 658
 659         int *fd_array;
 660         size_t fd_array_cap;
 661         size_t fd_array_cnt;
 662
 663         struct usdt_manager *usdt_man;
 664
 665         char path[];
 666 };
 667
 668 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
 669 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
 670 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
 671 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
 672 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
 673 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
 674 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
 675 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
 676 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 677
 678 void bpf_program__unload(struct bpf_program *prog)
 679 {
 680         if (!prog)
 681                 return;
 682
 683         zclose(prog->fd);
 684
 685         zfree(&prog->func_info);
 686         zfree(&prog->line_info);
 687 }
 688
 689 static void bpf_program__exit(struct bpf_program *prog)
 690 {
 691         if (!prog)
 692                 return;
 693
 694         bpf_program__unload(prog);
 695         zfree(&prog->name);
 696         zfree(&prog->sec_name);
 697         zfree(&prog->insns);
 698         zfree(&prog->reloc_desc);
 699
 700         prog->nr_reloc = 0;
 701         prog->insns_cnt = 0;
 702         prog->sec_idx = -1;
 703 }
 704
 705 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 706 {
 707         return BPF_CLASS(insn->code) == BPF_JMP &&
 708                BPF_OP(insn->code) == BPF_CALL &&
 709                BPF_SRC(insn->code) == BPF_K &&
 710                insn->src_reg == BPF_PSEUDO_CALL &&
 711                insn->dst_reg == 0 &&
 712                insn->off == 0;
 713 }
 714
 715 static bool is_call_insn(const struct bpf_insn *insn)
 716 {
 717         return insn->code == (BPF_JMP | BPF_CALL);
 718 }
 719
 720 static bool insn_is_pseudo_func(struct bpf_insn *insn)
 721 {
 722         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 723 }
 724
 725 static int
 726 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 727                       const char *name, size_t sec_idx, const char *sec_name,
 728                       size_t sec_off, void *insn_data, size_t insn_data_sz)
 729 {
 730         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 731                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 732                         sec_name, name, sec_off, insn_data_sz);
 733                 return -EINVAL;
 734         }
 735
 736         memset(prog, 0, sizeof(*prog));
 737         prog->obj = obj;
 738
 739         prog->sec_idx = sec_idx;
 740         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
 741         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
 742         /* insns_cnt can later be increased by appending used subprograms */
 743         prog->insns_cnt = prog->sec_insn_cnt;
 744
 745         prog->type = BPF_PROG_TYPE_UNSPEC;
 746         prog->fd = -1;
 747
 748         /* libbpf's convention for SEC("?abc...") is that it's just like
 749          * SEC("abc...") but the corresponding bpf_program starts out with
 750          * autoload set to false.
 751          */
 752         if (sec_name[0] == '?') {
 753                 prog->autoload = false;
 754                 /* from now on forget there was ? in section name */
 755                 sec_name++;
 756         } else {
 757                 prog->autoload = true;
 758         }
 759
 760         prog->autoattach = true;
 761
 762         /* inherit object's log_level */
 763         prog->log_level = obj->log_level;
 764
 765         prog->sec_name = strdup(sec_name);
 766         if (!prog->sec_name)
 767                 goto errout;
 768
 769         prog->name = strdup(name);
 770         if (!prog->name)
 771                 goto errout;
 772
 773         prog->insns = malloc(insn_data_sz);
 774         if (!prog->insns)
 775                 goto errout;
 776         memcpy(prog->insns, insn_data, insn_data_sz);
 777
 778         return 0;
 779 errout:
 780         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
 781         bpf_program__exit(prog);
 782         return -ENOMEM;
 783 }
 784
 785 static int
 786 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 787                          const char *sec_name, int sec_idx)
 788 {
 789         Elf_Data *symbols = obj->efile.symbols;
 790         struct bpf_program *prog, *progs;
 791         void *data = sec_data->d_buf;
 792         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
 793         int nr_progs, err, i;
 794         const char *name;
 795         Elf64_Sym *sym;
 796
 797         progs = obj->programs;
 798         nr_progs = obj->nr_programs;
 799         nr_syms = symbols->d_size / sizeof(Elf64_Sym);
 800         sec_off = 0;
 801
 802         for (i = 0; i < nr_syms; i++) {
 803                 sym = elf_sym_by_idx(obj, i);
 804
 805                 if (sym->st_shndx != sec_idx)
 806                         continue;
 807                 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
 808                         continue;
 809
 810                 prog_sz = sym->st_size;
 811                 sec_off = sym->st_value;
 812
 813                 name = elf_sym_str(obj, sym->st_name);
 814                 if (!name) {
 815                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
 816                                 sec_name, sec_off);
 817                         return -LIBBPF_ERRNO__FORMAT;
 818                 }
 819
 820                 if (sec_off + prog_sz > sec_sz) {
 821                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
 822                                 sec_name, sec_off);
 823                         return -LIBBPF_ERRNO__FORMAT;
 824                 }
 825
 826                 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 827                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
 828                         return -ENOTSUP;
 829                 }
 830
 831                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
 832                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
 833
 834                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
 835                 if (!progs) {
 836                         /*
 837                          * In this case the original obj->programs
 838                          * is still valid, so don't need special treat for
 839                          * bpf_close_object().
 840                          */
 841                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
 842                                 sec_name, name);
 843                         return -ENOMEM;
 844                 }
 845                 obj->programs = progs;
 846
 847                 prog = &progs[nr_progs];
 848
 849                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
 850                                             sec_off, data + sec_off, prog_sz);
 851                 if (err)
 852                         return err;
 853
 854                 /* if function is a global/weak symbol, but has restricted
 855                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
 856                  * as static to enable more permissive BPF verification mode
 857                  * with more outside context available to BPF verifier
 858                  */
 859                 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
 860                     && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
 861                         || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
 862                         prog->mark_btf_static = true;
 863
 864                 nr_progs++;
 865                 obj->nr_programs = nr_progs;
 866         }
 867
 868         return 0;
 869 }
 870
 871 __u32 get_kernel_version(void)
 872 {
 873         /* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release,
 874          * but Ubuntu provides /proc/version_signature file, as described at
 875          * https://ubuntu.com/kernel, with an example contents below, which we
 876          * can use to get a proper LINUX_VERSION_CODE.
 877          *
 878          *   Ubuntu 5.4.0-12.15-generic 5.4.8
 879          *
 880          * In the above, 5.4.8 is what kernel is actually expecting, while
 881          * uname() call will return 5.4.0 in info.release.
 882          */
 883         const char *ubuntu_kver_file = "/proc/version_signature";
 884         __u32 major, minor, patch;
 885         struct utsname info;
 886
 887         if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) == 0) {
 888                 FILE *f;
 889
 890                 f = fopen(ubuntu_kver_file, "r");
 891                 if (f) {
 892                         if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) {
 893                                 fclose(f);
 894                                 return KERNEL_VERSION(major, minor, patch);
 895                         }
 896                         fclose(f);
 897                 }
 898                 /* something went wrong, fall back to uname() approach */
 899         }
 900
 901         uname(&info);
 902         if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)
 903                 return 0;
 904         return KERNEL_VERSION(major, minor, patch);
 905 }
 906
 907 static const struct btf_member *
 908 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
 909 {
 910         struct btf_member *m;
 911         int i;
 912
 913         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 914                 if (btf_member_bit_offset(t, i) == bit_offset)
 915                         return m;
 916         }
 917
 918         return NULL;
 919 }
 920
 921 static const struct btf_member *
 922 find_member_by_name(const struct btf *btf, const struct btf_type *t,
 923                     const char *name)
 924 {
 925         struct btf_member *m;
 926         int i;
 927
 928         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 929                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
 930                         return m;
 931         }
 932
 933         return NULL;
 934 }
 935
 936 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
 937 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 938                                    const char *name, __u32 kind);
 939
 940 static int
 941 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
 942                            const struct btf_type **type, __u32 *type_id,
 943                            const struct btf_type **vtype, __u32 *vtype_id,
 944                            const struct btf_member **data_member)
 945 {
 946         const struct btf_type *kern_type, *kern_vtype;
 947         const struct btf_member *kern_data_member;
 948         __s32 kern_vtype_id, kern_type_id;
 949         __u32 i;
 950
 951         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
 952         if (kern_type_id < 0) {
 953                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
 954                         tname);
 955                 return kern_type_id;
 956         }
 957         kern_type = btf__type_by_id(btf, kern_type_id);
 958
 959         /* Find the corresponding "map_value" type that will be used
 960          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
 961          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
 962          * btf_vmlinux.
 963          */
 964         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
 965                                                 tname, BTF_KIND_STRUCT);
 966         if (kern_vtype_id < 0) {
 967                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
 968                         STRUCT_OPS_VALUE_PREFIX, tname);
 969                 return kern_vtype_id;
 970         }
 971         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
 972
 973         /* Find "struct tcp_congestion_ops" from
 974          * struct bpf_struct_ops_tcp_congestion_ops {
 975          *      [ ... ]
 976          *      struct tcp_congestion_ops data;
 977          * }
 978          */
 979         kern_data_member = btf_members(kern_vtype);
 980         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
 981                 if (kern_data_member->type == kern_type_id)
 982                         break;
 983         }
 984         if (i == btf_vlen(kern_vtype)) {
 985                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
 986                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
 987                 return -EINVAL;
 988         }
 989
 990         *type = kern_type;
 991         *type_id = kern_type_id;
 992         *vtype = kern_vtype;
 993         *vtype_id = kern_vtype_id;
 994         *data_member = kern_data_member;
 995
 996         return 0;
 997 }
 998
 999 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1000 {
1001         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1002 }
1003
1004 /* Init the map's fields that depend on kern_btf */
1005 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
1006                                          const struct btf *btf,
1007                                          const struct btf *kern_btf)
1008 {
1009         const struct btf_member *member, *kern_member, *kern_data_member;
1010         const struct btf_type *type, *kern_type, *kern_vtype;
1011         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1012         struct bpf_struct_ops *st_ops;
1013         void *data, *kern_data;
1014         const char *tname;
1015         int err;
1016
1017         st_ops = map->st_ops;
1018         type = st_ops->type;
1019         tname = st_ops->tname;
1020         err = find_struct_ops_kern_types(kern_btf, tname,
1021                                          &kern_type, &kern_type_id,
1022                                          &kern_vtype, &kern_vtype_id,
1023                                          &kern_data_member);
1024         if (err)
1025                 return err;
1026
1027         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1028                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1029
1030         map->def.value_size = kern_vtype->size;
1031         map->btf_vmlinux_value_type_id = kern_vtype_id;
1032
1033         st_ops->kern_vdata = calloc(1, kern_vtype->size);
1034         if (!st_ops->kern_vdata)
1035                 return -ENOMEM;
1036
1037         data = st_ops->data;
1038         kern_data_off = kern_data_member->offset / 8;
1039         kern_data = st_ops->kern_vdata + kern_data_off;
1040
1041         member = btf_members(type);
1042         for (i = 0; i < btf_vlen(type); i++, member++) {
1043                 const struct btf_type *mtype, *kern_mtype;
1044                 __u32 mtype_id, kern_mtype_id;
1045                 void *mdata, *kern_mdata;
1046                 __s64 msize, kern_msize;
1047                 __u32 moff, kern_moff;
1048                 __u32 kern_member_idx;
1049                 const char *mname;
1050
1051                 mname = btf__name_by_offset(btf, member->name_off);
1052                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1053                 if (!kern_member) {
1054                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1055                                 map->name, mname);
1056                         return -ENOTSUP;
1057                 }
1058
1059                 kern_member_idx = kern_member - btf_members(kern_type);
1060                 if (btf_member_bitfield_size(type, i) ||
1061                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
1062                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1063                                 map->name, mname);
1064                         return -ENOTSUP;
1065                 }
1066
1067                 moff = member->offset / 8;
1068                 kern_moff = kern_member->offset / 8;
1069
1070                 mdata = data + moff;
1071                 kern_mdata = kern_data + kern_moff;
1072
1073                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1074                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1075                                                     &kern_mtype_id);
1076                 if (BTF_INFO_KIND(mtype->info) !=
1077                     BTF_INFO_KIND(kern_mtype->info)) {
1078                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1079                                 map->name, mname, BTF_INFO_KIND(mtype->info),
1080                                 BTF_INFO_KIND(kern_mtype->info));
1081                         return -ENOTSUP;
1082                 }
1083
1084                 if (btf_is_ptr(mtype)) {
1085                         struct bpf_program *prog;
1086
1087                         prog = st_ops->progs[i];
1088                         if (!prog)
1089                                 continue;
1090
1091                         kern_mtype = skip_mods_and_typedefs(kern_btf,
1092                                                             kern_mtype->type,
1093                                                             &kern_mtype_id);
1094
1095                         /* mtype->type must be a func_proto which was
1096                          * guaranteed in bpf_object__collect_st_ops_relos(),
1097                          * so only check kern_mtype for func_proto here.
1098                          */
1099                         if (!btf_is_func_proto(kern_mtype)) {
1100                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1101                                         map->name, mname);
1102                                 return -ENOTSUP;
1103                         }
1104
1105                         prog->attach_btf_id = kern_type_id;
1106                         prog->expected_attach_type = kern_member_idx;
1107
1108                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1109
1110                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1111                                  map->name, mname, prog->name, moff,
1112                                  kern_moff);
1113
1114                         continue;
1115                 }
1116
1117                 msize = btf__resolve_size(btf, mtype_id);
1118                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1119                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1120                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1121                                 map->name, mname, (ssize_t)msize,
1122                                 (ssize_t)kern_msize);
1123                         return -ENOTSUP;
1124                 }
1125
1126                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1127                          map->name, mname, (unsigned int)msize,
1128                          moff, kern_moff);
1129                 memcpy(kern_mdata, mdata, msize);
1130         }
1131
1132         return 0;
1133 }
1134
1135 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1136 {
1137         struct bpf_map *map;
1138         size_t i;
1139         int err;
1140
1141         for (i = 0; i < obj->nr_maps; i++) {
1142                 map = &obj->maps[i];
1143
1144                 if (!bpf_map__is_struct_ops(map))
1145                         continue;
1146
1147                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
1148                                                     obj->btf_vmlinux);
1149                 if (err)
1150                         return err;
1151         }
1152
1153         return 0;
1154 }
1155
1156 static int bpf_object__init_struct_ops_maps(struct bpf_object *obj)
1157 {
1158         const struct btf_type *type, *datasec;
1159         const struct btf_var_secinfo *vsi;
1160         struct bpf_struct_ops *st_ops;
1161         const char *tname, *var_name;
1162         __s32 type_id, datasec_id;
1163         const struct btf *btf;
1164         struct bpf_map *map;
1165         __u32 i;
1166
1167         if (obj->efile.st_ops_shndx == -1)
1168                 return 0;
1169
1170         btf = obj->btf;
1171         datasec_id = btf__find_by_name_kind(btf, STRUCT_OPS_SEC,
1172                                             BTF_KIND_DATASEC);
1173         if (datasec_id < 0) {
1174                 pr_warn("struct_ops init: DATASEC %s not found\n",
1175                         STRUCT_OPS_SEC);
1176                 return -EINVAL;
1177         }
1178
1179         datasec = btf__type_by_id(btf, datasec_id);
1180         vsi = btf_var_secinfos(datasec);
1181         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1182                 type = btf__type_by_id(obj->btf, vsi->type);
1183                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1184
1185                 type_id = btf__resolve_type(obj->btf, vsi->type);
1186                 if (type_id < 0) {
1187                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1188                                 vsi->type, STRUCT_OPS_SEC);
1189                         return -EINVAL;
1190                 }
1191
1192                 type = btf__type_by_id(obj->btf, type_id);
1193                 tname = btf__name_by_offset(obj->btf, type->name_off);
1194                 if (!tname[0]) {
1195                         pr_warn("struct_ops init: anonymous type is not supported\n");
1196                         return -ENOTSUP;
1197                 }
1198                 if (!btf_is_struct(type)) {
1199                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1200                         return -EINVAL;
1201                 }
1202
1203                 map = bpf_object__add_map(obj);
1204                 if (IS_ERR(map))
1205                         return PTR_ERR(map);
1206
1207                 map->sec_idx = obj->efile.st_ops_shndx;
1208                 map->sec_offset = vsi->offset;
1209                 map->name = strdup(var_name);
1210                 if (!map->name)
1211                         return -ENOMEM;
1212
1213                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1214                 map->def.key_size = sizeof(int);
1215                 map->def.value_size = type->size;
1216                 map->def.max_entries = 1;
1217
1218                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1219                 if (!map->st_ops)
1220                         return -ENOMEM;
1221                 st_ops = map->st_ops;
1222                 st_ops->data = malloc(type->size);
1223                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1224                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1225                                                sizeof(*st_ops->kern_func_off));
1226                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1227                         return -ENOMEM;
1228
1229                 if (vsi->offset + type->size > obj->efile.st_ops_data->d_size) {
1230                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1231                                 var_name, STRUCT_OPS_SEC);
1232                         return -EINVAL;
1233                 }
1234
1235                 memcpy(st_ops->data,
1236                        obj->efile.st_ops_data->d_buf + vsi->offset,
1237                        type->size);
1238                 st_ops->tname = tname;
1239                 st_ops->type = type;
1240                 st_ops->type_id = type_id;
1241
1242                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1243                          tname, type_id, var_name, vsi->offset);
1244         }
1245
1246         return 0;
1247 }
1248
1249 static struct bpf_object *bpf_object__new(const char *path,
1250                                           const void *obj_buf,
1251                                           size_t obj_buf_sz,
1252                                           const char *obj_name)
1253 {
1254         struct bpf_object *obj;
1255         char *end;
1256
1257         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1258         if (!obj) {
1259                 pr_warn("alloc memory failed for %s\n", path);
1260                 return ERR_PTR(-ENOMEM);
1261         }
1262
1263         strcpy(obj->path, path);
1264         if (obj_name) {
1265                 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1266         } else {
1267                 /* Using basename() GNU version which doesn't modify arg. */
1268                 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1269                 end = strchr(obj->name, '.');
1270                 if (end)
1271                         *end = 0;
1272         }
1273
1274         obj->efile.fd = -1;
1275         /*
1276          * Caller of this function should also call
1277          * bpf_object__elf_finish() after data collection to return
1278          * obj_buf to user. If not, we should duplicate the buffer to
1279          * avoid user freeing them before elf finish.
1280          */
1281         obj->efile.obj_buf = obj_buf;
1282         obj->efile.obj_buf_sz = obj_buf_sz;
1283         obj->efile.btf_maps_shndx = -1;
1284         obj->efile.st_ops_shndx = -1;
1285         obj->kconfig_map_idx = -1;
1286
1287         obj->kern_version = get_kernel_version();
1288         obj->loaded = false;
1289
1290         return obj;
1291 }
1292
1293 static void bpf_object__elf_finish(struct bpf_object *obj)
1294 {
1295         if (!obj->efile.elf)
1296                 return;
1297
1298         elf_end(obj->efile.elf);
1299         obj->efile.elf = NULL;
1300         obj->efile.symbols = NULL;
1301         obj->efile.st_ops_data = NULL;
1302
1303         zfree(&obj->efile.secs);
1304         obj->efile.sec_cnt = 0;
1305         zclose(obj->efile.fd);
1306         obj->efile.obj_buf = NULL;
1307         obj->efile.obj_buf_sz = 0;
1308 }
1309
1310 static int bpf_object__elf_init(struct bpf_object *obj)
1311 {
1312         Elf64_Ehdr *ehdr;
1313         int err = 0;
1314         Elf *elf;
1315
1316         if (obj->efile.elf) {
1317                 pr_warn("elf: init internal error\n");
1318                 return -LIBBPF_ERRNO__LIBELF;
1319         }
1320
1321         if (obj->efile.obj_buf_sz > 0) {
1322                 /* obj_buf should have been validated by bpf_object__open_mem(). */
1323                 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1324         } else {
1325                 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1326                 if (obj->efile.fd < 0) {
1327                         char errmsg[STRERR_BUFSIZE], *cp;
1328
1329                         err = -errno;
1330                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1331                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1332                         return err;
1333                 }
1334
1335                 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1336         }
1337
1338         if (!elf) {
1339                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1340                 err = -LIBBPF_ERRNO__LIBELF;
1341                 goto errout;
1342         }
1343
1344         obj->efile.elf = elf;
1345
1346         if (elf_kind(elf) != ELF_K_ELF) {
1347                 err = -LIBBPF_ERRNO__FORMAT;
1348                 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1349                 goto errout;
1350         }
1351
1352         if (gelf_getclass(elf) != ELFCLASS64) {
1353                 err = -LIBBPF_ERRNO__FORMAT;
1354                 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1355                 goto errout;
1356         }
1357
1358         obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1359         if (!obj->efile.ehdr) {
1360                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1361                 err = -LIBBPF_ERRNO__FORMAT;
1362                 goto errout;
1363         }
1364
1365         if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1366                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1367                         obj->path, elf_errmsg(-1));
1368                 err = -LIBBPF_ERRNO__FORMAT;
1369                 goto errout;
1370         }
1371
1372         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1373         if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1374                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1375                         obj->path, elf_errmsg(-1));
1376                 err = -LIBBPF_ERRNO__FORMAT;
1377                 goto errout;
1378         }
1379
1380         /* Old LLVM set e_machine to EM_NONE */
1381         if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1382                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1383                 err = -LIBBPF_ERRNO__FORMAT;
1384                 goto errout;
1385         }
1386
1387         return 0;
1388 errout:
1389         bpf_object__elf_finish(obj);
1390         return err;
1391 }
1392
1393 static int bpf_object__check_endianness(struct bpf_object *obj)
1394 {
1395 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1396         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1397                 return 0;
1398 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1399         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1400                 return 0;
1401 #else
1402 # error "Unrecognized __BYTE_ORDER__"
1403 #endif
1404         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1405         return -LIBBPF_ERRNO__ENDIAN;
1406 }
1407
1408 static int
1409 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1410 {
1411         if (!data) {
1412                 pr_warn("invalid license section in %s\n", obj->path);
1413                 return -LIBBPF_ERRNO__FORMAT;
1414         }
1415         /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1416          * go over allowed ELF data section buffer
1417          */
1418         libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1419         pr_debug("license of %s is %s\n", obj->path, obj->license);
1420         return 0;
1421 }
1422
1423 static int
1424 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1425 {
1426         __u32 kver;
1427
1428         if (!data || size != sizeof(kver)) {
1429                 pr_warn("invalid kver section in %s\n", obj->path);
1430                 return -LIBBPF_ERRNO__FORMAT;
1431         }
1432         memcpy(&kver, data, sizeof(kver));
1433         obj->kern_version = kver;
1434         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1435         return 0;
1436 }
1437
1438 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1439 {
1440         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1441             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1442                 return true;
1443         return false;
1444 }
1445
1446 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1447 {
1448         Elf_Data *data;
1449         Elf_Scn *scn;
1450
1451         if (!name)
1452                 return -EINVAL;
1453
1454         scn = elf_sec_by_name(obj, name);
1455         data = elf_sec_data(obj, scn);
1456         if (data) {
1457                 *size = data->d_size;
1458                 return 0; /* found it */
1459         }
1460
1461         return -ENOENT;
1462 }
1463
1464 static int find_elf_var_offset(const struct bpf_object *obj, const char *name, __u32 *off)
1465 {
1466         Elf_Data *symbols = obj->efile.symbols;
1467         const char *sname;
1468         size_t si;
1469
1470         if (!name || !off)
1471                 return -EINVAL;
1472
1473         for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1474                 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1475
1476                 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1477                         continue;
1478
1479                 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1480                     ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1481                         continue;
1482
1483                 sname = elf_sym_str(obj, sym->st_name);
1484                 if (!sname) {
1485                         pr_warn("failed to get sym name string for var %s\n", name);
1486                         return -EIO;
1487                 }
1488                 if (strcmp(name, sname) == 0) {
1489                         *off = sym->st_value;
1490                         return 0;
1491                 }
1492         }
1493
1494         return -ENOENT;
1495 }
1496
1497 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1498 {
1499         struct bpf_map *map;
1500         int err;
1501
1502         err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1503                                 sizeof(*obj->maps), obj->nr_maps + 1);
1504         if (err)
1505                 return ERR_PTR(err);
1506
1507         map = &obj->maps[obj->nr_maps++];
1508         map->obj = obj;
1509         map->fd = -1;
1510         map->inner_map_fd = -1;
1511         map->autocreate = true;
1512
1513         return map;
1514 }
1515
1516 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1517 {
1518         long page_sz = sysconf(_SC_PAGE_SIZE);
1519         size_t map_sz;
1520
1521         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1522         map_sz = roundup(map_sz, page_sz);
1523         return map_sz;
1524 }
1525
1526 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1527 {
1528         char map_name[BPF_OBJ_NAME_LEN], *p;
1529         int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1530
1531         /* This is one of the more confusing parts of libbpf for various
1532          * reasons, some of which are historical. The original idea for naming
1533          * internal names was to include as much of BPF object name prefix as
1534          * possible, so that it can be distinguished from similar internal
1535          * maps of a different BPF object.
1536          * As an example, let's say we have bpf_object named 'my_object_name'
1537          * and internal map corresponding to '.rodata' ELF section. The final
1538          * map name advertised to user and to the kernel will be
1539          * 'my_objec.rodata', taking first 8 characters of object name and
1540          * entire 7 characters of '.rodata'.
1541          * Somewhat confusingly, if internal map ELF section name is shorter
1542          * than 7 characters, e.g., '.bss', we still reserve 7 characters
1543          * for the suffix, even though we only have 4 actual characters, and
1544          * resulting map will be called 'my_objec.bss', not even using all 15
1545          * characters allowed by the kernel. Oh well, at least the truncated
1546          * object name is somewhat consistent in this case. But if the map
1547          * name is '.kconfig', we'll still have entirety of '.kconfig' added
1548          * (8 chars) and thus will be left with only first 7 characters of the
1549          * object name ('my_obje'). Happy guessing, user, that the final map
1550          * name will be "my_obje.kconfig".
1551          * Now, with libbpf starting to support arbitrarily named .rodata.*
1552          * and .data.* data sections, it's possible that ELF section name is
1553          * longer than allowed 15 chars, so we now need to be careful to take
1554          * only up to 15 first characters of ELF name, taking no BPF object
1555          * name characters at all. So '.rodata.abracadabra' will result in
1556          * '.rodata.abracad' kernel and user-visible name.
1557          * We need to keep this convoluted logic intact for .data, .bss and
1558          * .rodata maps, but for new custom .data.custom and .rodata.custom
1559          * maps we use their ELF names as is, not prepending bpf_object name
1560          * in front. We still need to truncate them to 15 characters for the
1561          * kernel. Full name can be recovered for such maps by using DATASEC
1562          * BTF type associated with such map's value type, though.
1563          */
1564         if (sfx_len >= BPF_OBJ_NAME_LEN)
1565                 sfx_len = BPF_OBJ_NAME_LEN - 1;
1566
1567         /* if there are two or more dots in map name, it's a custom dot map */
1568         if (strchr(real_name + 1, '.') != NULL)
1569                 pfx_len = 0;
1570         else
1571                 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1572
1573         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1574                  sfx_len, real_name);
1575
1576         /* sanitise map name to characters allowed by kernel */
1577         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1578                 if (!isalnum(*p) && *p != '_' && *p != '.')
1579                         *p = '_';
1580
1581         return strdup(map_name);
1582 }
1583
1584 static int
1585 bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map);
1586
1587 static int
1588 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1589                               const char *real_name, int sec_idx, void *data, size_t data_sz)
1590 {
1591         struct bpf_map_def *def;
1592         struct bpf_map *map;
1593         int err;
1594
1595         map = bpf_object__add_map(obj);
1596         if (IS_ERR(map))
1597                 return PTR_ERR(map);
1598
1599         map->libbpf_type = type;
1600         map->sec_idx = sec_idx;
1601         map->sec_offset = 0;
1602         map->real_name = strdup(real_name);
1603         map->name = internal_map_name(obj, real_name);
1604         if (!map->real_name || !map->name) {
1605                 zfree(&map->real_name);
1606                 zfree(&map->name);
1607                 return -ENOMEM;
1608         }
1609
1610         def = &map->def;
1611         def->type = BPF_MAP_TYPE_ARRAY;
1612         def->key_size = sizeof(int);
1613         def->value_size = data_sz;
1614         def->max_entries = 1;
1615         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1616                          ? BPF_F_RDONLY_PROG : 0;
1617         def->map_flags |= BPF_F_MMAPABLE;
1618
1619         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1620                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1621
1622         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1623                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1624         if (map->mmaped == MAP_FAILED) {
1625                 err = -errno;
1626                 map->mmaped = NULL;
1627                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1628                         map->name, err);
1629                 zfree(&map->real_name);
1630                 zfree(&map->name);
1631                 return err;
1632         }
1633
1634         /* failures are fine because of maps like .rodata.str1.1 */
1635         (void) bpf_map_find_btf_info(obj, map);
1636
1637         if (data)
1638                 memcpy(map->mmaped, data, data_sz);
1639
1640         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1641         return 0;
1642 }
1643
1644 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1645 {
1646         struct elf_sec_desc *sec_desc;
1647         const char *sec_name;
1648         int err = 0, sec_idx;
1649
1650         /*
1651          * Populate obj->maps with libbpf internal maps.
1652          */
1653         for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1654                 sec_desc = &obj->efile.secs[sec_idx];
1655
1656                 /* Skip recognized sections with size 0. */
1657                 if (!sec_desc->data || sec_desc->data->d_size == 0)
1658                         continue;
1659
1660                 switch (sec_desc->sec_type) {
1661                 case SEC_DATA:
1662                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1663                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1664                                                             sec_name, sec_idx,
1665                                                             sec_desc->data->d_buf,
1666                                                             sec_desc->data->d_size);
1667                         break;
1668                 case SEC_RODATA:
1669                         obj->has_rodata = true;
1670                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1671                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1672                                                             sec_name, sec_idx,
1673                                                             sec_desc->data->d_buf,
1674                                                             sec_desc->data->d_size);
1675                         break;
1676                 case SEC_BSS:
1677                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1678                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1679                                                             sec_name, sec_idx,
1680                                                             NULL,
1681                                                             sec_desc->data->d_size);
1682                         break;
1683                 default:
1684                         /* skip */
1685                         break;
1686                 }
1687                 if (err)
1688                         return err;
1689         }
1690         return 0;
1691 }
1692
1693
1694 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1695                                                const void *name)
1696 {
1697         int i;
1698
1699         for (i = 0; i < obj->nr_extern; i++) {
1700                 if (strcmp(obj->externs[i].name, name) == 0)
1701                         return &obj->externs[i];
1702         }
1703         return NULL;
1704 }
1705
1706 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1707                               char value)
1708 {
1709         switch (ext->kcfg.type) {
1710         case KCFG_BOOL:
1711                 if (value == 'm') {
1712                         pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1713                                 ext->name, value);
1714                         return -EINVAL;
1715                 }
1716                 *(bool *)ext_val = value == 'y' ? true : false;
1717                 break;
1718         case KCFG_TRISTATE:
1719                 if (value == 'y')
1720                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1721                 else if (value == 'm')
1722                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1723                 else /* value == 'n' */
1724                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1725                 break;
1726         case KCFG_CHAR:
1727                 *(char *)ext_val = value;
1728                 break;
1729         case KCFG_UNKNOWN:
1730         case KCFG_INT:
1731         case KCFG_CHAR_ARR:
1732         default:
1733                 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1734                         ext->name, value);
1735                 return -EINVAL;
1736         }
1737         ext->is_set = true;
1738         return 0;
1739 }
1740
1741 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1742                               const char *value)
1743 {
1744         size_t len;
1745
1746         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1747                 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1748                         ext->name, value);
1749                 return -EINVAL;
1750         }
1751
1752         len = strlen(value);
1753         if (value[len - 1] != '"') {
1754                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1755                         ext->name, value);
1756                 return -EINVAL;
1757         }
1758
1759         /* strip quotes */
1760         len -= 2;
1761         if (len >= ext->kcfg.sz) {
1762                 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1763                         ext->name, value, len, ext->kcfg.sz - 1);
1764                 len = ext->kcfg.sz - 1;
1765         }
1766         memcpy(ext_val, value + 1, len);
1767         ext_val[len] = '\0';
1768         ext->is_set = true;
1769         return 0;
1770 }
1771
1772 static int parse_u64(const char *value, __u64 *res)
1773 {
1774         char *value_end;
1775         int err;
1776
1777         errno = 0;
1778         *res = strtoull(value, &value_end, 0);
1779         if (errno) {
1780                 err = -errno;
1781                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1782                 return err;
1783         }
1784         if (*value_end) {
1785                 pr_warn("failed to parse '%s' as integer completely\n", value);
1786                 return -EINVAL;
1787         }
1788         return 0;
1789 }
1790
1791 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1792 {
1793         int bit_sz = ext->kcfg.sz * 8;
1794
1795         if (ext->kcfg.sz == 8)
1796                 return true;
1797
1798         /* Validate that value stored in u64 fits in integer of `ext->sz`
1799          * bytes size without any loss of information. If the target integer
1800          * is signed, we rely on the following limits of integer type of
1801          * Y bits and subsequent transformation:
1802          *
1803          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1804          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1805          *            0 <= X + 2^(Y-1) <  2^Y
1806          *
1807          *  For unsigned target integer, check that all the (64 - Y) bits are
1808          *  zero.
1809          */
1810         if (ext->kcfg.is_signed)
1811                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1812         else
1813                 return (v >> bit_sz) == 0;
1814 }
1815
1816 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1817                               __u64 value)
1818 {
1819         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
1820             ext->kcfg.type != KCFG_BOOL) {
1821                 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
1822                         ext->name, (unsigned long long)value);
1823                 return -EINVAL;
1824         }
1825         if (ext->kcfg.type == KCFG_BOOL && value > 1) {
1826                 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
1827                         ext->name, (unsigned long long)value);
1828                 return -EINVAL;
1829
1830         }
1831         if (!is_kcfg_value_in_range(ext, value)) {
1832                 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
1833                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1834                 return -ERANGE;
1835         }
1836         switch (ext->kcfg.sz) {
1837                 case 1: *(__u8 *)ext_val = value; break;
1838                 case 2: *(__u16 *)ext_val = value; break;
1839                 case 4: *(__u32 *)ext_val = value; break;
1840                 case 8: *(__u64 *)ext_val = value; break;
1841                 default:
1842                         return -EINVAL;
1843         }
1844         ext->is_set = true;
1845         return 0;
1846 }
1847
1848 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1849                                             char *buf, void *data)
1850 {
1851         struct extern_desc *ext;
1852         char *sep, *value;
1853         int len, err = 0;
1854         void *ext_val;
1855         __u64 num;
1856
1857         if (!str_has_pfx(buf, "CONFIG_"))
1858                 return 0;
1859
1860         sep = strchr(buf, '=');
1861         if (!sep) {
1862                 pr_warn("failed to parse '%s': no separator\n", buf);
1863                 return -EINVAL;
1864         }
1865
1866         /* Trim ending '\n' */
1867         len = strlen(buf);
1868         if (buf[len - 1] == '\n')
1869                 buf[len - 1] = '\0';
1870         /* Split on '=' and ensure that a value is present. */
1871         *sep = '\0';
1872         if (!sep[1]) {
1873                 *sep = '=';
1874                 pr_warn("failed to parse '%s': no value\n", buf);
1875                 return -EINVAL;
1876         }
1877
1878         ext = find_extern_by_name(obj, buf);
1879         if (!ext || ext->is_set)
1880                 return 0;
1881
1882         ext_val = data + ext->kcfg.data_off;
1883         value = sep + 1;
1884
1885         switch (*value) {
1886         case 'y': case 'n': case 'm':
1887                 err = set_kcfg_value_tri(ext, ext_val, *value);
1888                 break;
1889         case '"':
1890                 err = set_kcfg_value_str(ext, ext_val, value);
1891                 break;
1892         default:
1893                 /* assume integer */
1894                 err = parse_u64(value, &num);
1895                 if (err) {
1896                         pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
1897                         return err;
1898                 }
1899                 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1900                         pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
1901                         return -EINVAL;
1902                 }
1903                 err = set_kcfg_value_num(ext, ext_val, num);
1904                 break;
1905         }
1906         if (err)
1907                 return err;
1908         pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
1909         return 0;
1910 }
1911
1912 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1913 {
1914         char buf[PATH_MAX];
1915         struct utsname uts;
1916         int len, err = 0;
1917         gzFile file;
1918
1919         uname(&uts);
1920         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1921         if (len < 0)
1922                 return -EINVAL;
1923         else if (len >= PATH_MAX)
1924                 return -ENAMETOOLONG;
1925
1926         /* gzopen also accepts uncompressed files. */
1927         file = gzopen(buf, "r");
1928         if (!file)
1929                 file = gzopen("/proc/config.gz", "r");
1930
1931         if (!file) {
1932                 pr_warn("failed to open system Kconfig\n");
1933                 return -ENOENT;
1934         }
1935
1936         while (gzgets(file, buf, sizeof(buf))) {
1937                 err = bpf_object__process_kconfig_line(obj, buf, data);
1938                 if (err) {
1939                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1940                                 buf, err);
1941                         goto out;
1942                 }
1943         }
1944
1945 out:
1946         gzclose(file);
1947         return err;
1948 }
1949
1950 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1951                                         const char *config, void *data)
1952 {
1953         char buf[PATH_MAX];
1954         int err = 0;
1955         FILE *file;
1956
1957         file = fmemopen((void *)config, strlen(config), "r");
1958         if (!file) {
1959                 err = -errno;
1960                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1961                 return err;
1962         }
1963
1964         while (fgets(buf, sizeof(buf), file)) {
1965                 err = bpf_object__process_kconfig_line(obj, buf, data);
1966                 if (err) {
1967                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1968                                 buf, err);
1969                         break;
1970                 }
1971         }
1972
1973         fclose(file);
1974         return err;
1975 }
1976
1977 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
1978 {
1979         struct extern_desc *last_ext = NULL, *ext;
1980         size_t map_sz;
1981         int i, err;
1982
1983         for (i = 0; i < obj->nr_extern; i++) {
1984                 ext = &obj->externs[i];
1985                 if (ext->type == EXT_KCFG)
1986                         last_ext = ext;
1987         }
1988
1989         if (!last_ext)
1990                 return 0;
1991
1992         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
1993         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
1994                                             ".kconfig", obj->efile.symbols_shndx,
1995                                             NULL, map_sz);
1996         if (err)
1997                 return err;
1998
1999         obj->kconfig_map_idx = obj->nr_maps - 1;
2000
2001         return 0;
2002 }
2003
2004 const struct btf_type *
2005 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2006 {
2007         const struct btf_type *t = btf__type_by_id(btf, id);
2008
2009         if (res_id)
2010                 *res_id = id;
2011
2012         while (btf_is_mod(t) || btf_is_typedef(t)) {
2013                 if (res_id)
2014                         *res_id = t->type;
2015                 t = btf__type_by_id(btf, t->type);
2016         }
2017
2018         return t;
2019 }
2020
2021 static const struct btf_type *
2022 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2023 {
2024         const struct btf_type *t;
2025
2026         t = skip_mods_and_typedefs(btf, id, NULL);
2027         if (!btf_is_ptr(t))
2028                 return NULL;
2029
2030         t = skip_mods_and_typedefs(btf, t->type, res_id);
2031
2032         return btf_is_func_proto(t) ? t : NULL;
2033 }
2034
2035 static const char *__btf_kind_str(__u16 kind)
2036 {
2037         switch (kind) {
2038         case BTF_KIND_UNKN: return "void";
2039         case BTF_KIND_INT: return "int";
2040         case BTF_KIND_PTR: return "ptr";
2041         case BTF_KIND_ARRAY: return "array";
2042         case BTF_KIND_STRUCT: return "struct";
2043         case BTF_KIND_UNION: return "union";
2044         case BTF_KIND_ENUM: return "enum";
2045         case BTF_KIND_FWD: return "fwd";
2046         case BTF_KIND_TYPEDEF: return "typedef";
2047         case BTF_KIND_VOLATILE: return "volatile";
2048         case BTF_KIND_CONST: return "const";
2049         case BTF_KIND_RESTRICT: return "restrict";
2050         case BTF_KIND_FUNC: return "func";
2051         case BTF_KIND_FUNC_PROTO: return "func_proto";
2052         case BTF_KIND_VAR: return "var";
2053         case BTF_KIND_DATASEC: return "datasec";
2054         case BTF_KIND_FLOAT: return "float";
2055         case BTF_KIND_DECL_TAG: return "decl_tag";
2056         case BTF_KIND_TYPE_TAG: return "type_tag";
2057         case BTF_KIND_ENUM64: return "enum64";
2058         default: return "unknown";
2059         }
2060 }
2061
2062 const char *btf_kind_str(const struct btf_type *t)
2063 {
2064         return __btf_kind_str(btf_kind(t));
2065 }
2066
2067 /*
2068  * Fetch integer attribute of BTF map definition. Such attributes are
2069  * represented using a pointer to an array, in which dimensionality of array
2070  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2071  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2072  * type definition, while using only sizeof(void *) space in ELF data section.
2073  */
2074 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2075                               const struct btf_member *m, __u32 *res)
2076 {
2077         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2078         const char *name = btf__name_by_offset(btf, m->name_off);
2079         const struct btf_array *arr_info;
2080         const struct btf_type *arr_t;
2081
2082         if (!btf_is_ptr(t)) {
2083                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2084                         map_name, name, btf_kind_str(t));
2085                 return false;
2086         }
2087
2088         arr_t = btf__type_by_id(btf, t->type);
2089         if (!arr_t) {
2090                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2091                         map_name, name, t->type);
2092                 return false;
2093         }
2094         if (!btf_is_array(arr_t)) {
2095                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2096                         map_name, name, btf_kind_str(arr_t));
2097                 return false;
2098         }
2099         arr_info = btf_array(arr_t);
2100         *res = arr_info->nelems;
2101         return true;
2102 }
2103
2104 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2105 {
2106         int len;
2107
2108         len = snprintf(buf, buf_sz, "%s/%s", path, name);
2109         if (len < 0)
2110                 return -EINVAL;
2111         if (len >= buf_sz)
2112                 return -ENAMETOOLONG;
2113
2114         return 0;
2115 }
2116
2117 static int build_map_pin_path(struct bpf_map *map, const char *path)
2118 {
2119         char buf[PATH_MAX];
2120         int err;
2121
2122         if (!path)
2123                 path = "/sys/fs/bpf";
2124
2125         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2126         if (err)
2127                 return err;
2128
2129         return bpf_map__set_pin_path(map, buf);
2130 }
2131
2132 /* should match definition in bpf_helpers.h */
2133 enum libbpf_pin_type {
2134         LIBBPF_PIN_NONE,
2135         /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2136         LIBBPF_PIN_BY_NAME,
2137 };
2138
2139 int parse_btf_map_def(const char *map_name, struct btf *btf,
2140                       const struct btf_type *def_t, bool strict,
2141                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2142 {
2143         const struct btf_type *t;
2144         const struct btf_member *m;
2145         bool is_inner = inner_def == NULL;
2146         int vlen, i;
2147
2148         vlen = btf_vlen(def_t);
2149         m = btf_members(def_t);
2150         for (i = 0; i < vlen; i++, m++) {
2151                 const char *name = btf__name_by_offset(btf, m->name_off);
2152
2153                 if (!name) {
2154                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2155                         return -EINVAL;
2156                 }
2157                 if (strcmp(name, "type") == 0) {
2158                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2159                                 return -EINVAL;
2160                         map_def->parts |= MAP_DEF_MAP_TYPE;
2161                 } else if (strcmp(name, "max_entries") == 0) {
2162                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2163                                 return -EINVAL;
2164                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2165                 } else if (strcmp(name, "map_flags") == 0) {
2166                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2167                                 return -EINVAL;
2168                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2169                 } else if (strcmp(name, "numa_node") == 0) {
2170                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2171                                 return -EINVAL;
2172                         map_def->parts |= MAP_DEF_NUMA_NODE;
2173                 } else if (strcmp(name, "key_size") == 0) {
2174                         __u32 sz;
2175
2176                         if (!get_map_field_int(map_name, btf, m, &sz))
2177                                 return -EINVAL;
2178                         if (map_def->key_size && map_def->key_size != sz) {
2179                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2180                                         map_name, map_def->key_size, sz);
2181                                 return -EINVAL;
2182                         }
2183                         map_def->key_size = sz;
2184                         map_def->parts |= MAP_DEF_KEY_SIZE;
2185                 } else if (strcmp(name, "key") == 0) {
2186                         __s64 sz;
2187
2188                         t = btf__type_by_id(btf, m->type);
2189                         if (!t) {
2190                                 pr_warn("map '%s': key type [%d] not found.\n",
2191                                         map_name, m->type);
2192                                 return -EINVAL;
2193                         }
2194                         if (!btf_is_ptr(t)) {
2195                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2196                                         map_name, btf_kind_str(t));
2197                                 return -EINVAL;
2198                         }
2199                         sz = btf__resolve_size(btf, t->type);
2200                         if (sz < 0) {
2201                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2202                                         map_name, t->type, (ssize_t)sz);
2203                                 return sz;
2204                         }
2205                         if (map_def->key_size && map_def->key_size != sz) {
2206                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2207                                         map_name, map_def->key_size, (ssize_t)sz);
2208                                 return -EINVAL;
2209                         }
2210                         map_def->key_size = sz;
2211                         map_def->key_type_id = t->type;
2212                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2213                 } else if (strcmp(name, "value_size") == 0) {
2214                         __u32 sz;
2215
2216                         if (!get_map_field_int(map_name, btf, m, &sz))
2217                                 return -EINVAL;
2218                         if (map_def->value_size && map_def->value_size != sz) {
2219                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2220                                         map_name, map_def->value_size, sz);
2221                                 return -EINVAL;
2222                         }
2223                         map_def->value_size = sz;
2224                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2225                 } else if (strcmp(name, "value") == 0) {
2226                         __s64 sz;
2227
2228                         t = btf__type_by_id(btf, m->type);
2229                         if (!t) {
2230                                 pr_warn("map '%s': value type [%d] not found.\n",
2231                                         map_name, m->type);
2232                                 return -EINVAL;
2233                         }
2234                         if (!btf_is_ptr(t)) {
2235                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2236                                         map_name, btf_kind_str(t));
2237                                 return -EINVAL;
2238                         }
2239                         sz = btf__resolve_size(btf, t->type);
2240                         if (sz < 0) {
2241                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2242                                         map_name, t->type, (ssize_t)sz);
2243                                 return sz;
2244                         }
2245                         if (map_def->value_size && map_def->value_size != sz) {
2246                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2247                                         map_name, map_def->value_size, (ssize_t)sz);
2248                                 return -EINVAL;
2249                         }
2250                         map_def->value_size = sz;
2251                         map_def->value_type_id = t->type;
2252                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2253                 }
2254                 else if (strcmp(name, "values") == 0) {
2255                         bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2256                         bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2257                         const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2258                         char inner_map_name[128];
2259                         int err;
2260
2261                         if (is_inner) {
2262                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2263                                         map_name);
2264                                 return -ENOTSUP;
2265                         }
2266                         if (i != vlen - 1) {
2267                                 pr_warn("map '%s': '%s' member should be last.\n",
2268                                         map_name, name);
2269                                 return -EINVAL;
2270                         }
2271                         if (!is_map_in_map && !is_prog_array) {
2272                                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2273                                         map_name);
2274                                 return -ENOTSUP;
2275                         }
2276                         if (map_def->value_size && map_def->value_size != 4) {
2277                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2278                                         map_name, map_def->value_size);
2279                                 return -EINVAL;
2280                         }
2281                         map_def->value_size = 4;
2282                         t = btf__type_by_id(btf, m->type);
2283                         if (!t) {
2284                                 pr_warn("map '%s': %s type [%d] not found.\n",
2285                                         map_name, desc, m->type);
2286                                 return -EINVAL;
2287                         }
2288                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2289                                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2290                                         map_name, desc);
2291                                 return -EINVAL;
2292                         }
2293                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2294                         if (!btf_is_ptr(t)) {
2295                                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2296                                         map_name, desc, btf_kind_str(t));
2297                                 return -EINVAL;
2298                         }
2299                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2300                         if (is_prog_array) {
2301                                 if (!btf_is_func_proto(t)) {
2302                                         pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2303                                                 map_name, btf_kind_str(t));
2304                                         return -EINVAL;
2305                                 }
2306                                 continue;
2307                         }
2308                         if (!btf_is_struct(t)) {
2309                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2310                                         map_name, btf_kind_str(t));
2311                                 return -EINVAL;
2312                         }
2313
2314                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2315                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2316                         if (err)
2317                                 return err;
2318
2319                         map_def->parts |= MAP_DEF_INNER_MAP;
2320                 } else if (strcmp(name, "pinning") == 0) {
2321                         __u32 val;
2322
2323                         if (is_inner) {
2324                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2325                                 return -EINVAL;
2326                         }
2327                         if (!get_map_field_int(map_name, btf, m, &val))
2328                                 return -EINVAL;
2329                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2330                                 pr_warn("map '%s': invalid pinning value %u.\n",
2331                                         map_name, val);
2332                                 return -EINVAL;
2333                         }
2334                         map_def->pinning = val;
2335                         map_def->parts |= MAP_DEF_PINNING;
2336                 } else if (strcmp(name, "map_extra") == 0) {
2337                         __u32 map_extra;
2338
2339                         if (!get_map_field_int(map_name, btf, m, &map_extra))
2340                                 return -EINVAL;
2341                         map_def->map_extra = map_extra;
2342                         map_def->parts |= MAP_DEF_MAP_EXTRA;
2343                 } else {
2344                         if (strict) {
2345                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2346                                 return -ENOTSUP;
2347                         }
2348                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2349                 }
2350         }
2351
2352         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2353                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2354                 return -EINVAL;
2355         }
2356
2357         return 0;
2358 }
2359
2360 static size_t adjust_ringbuf_sz(size_t sz)
2361 {
2362         __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2363         __u32 mul;
2364
2365         /* if user forgot to set any size, make sure they see error */
2366         if (sz == 0)
2367                 return 0;
2368         /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2369          * a power-of-2 multiple of kernel's page size. If user diligently
2370          * satisified these conditions, pass the size through.
2371          */
2372         if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2373                 return sz;
2374
2375         /* Otherwise find closest (page_sz * power_of_2) product bigger than
2376          * user-set size to satisfy both user size request and kernel
2377          * requirements and substitute correct max_entries for map creation.
2378          */
2379         for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2380                 if (mul * page_sz > sz)
2381                         return mul * page_sz;
2382         }
2383
2384         /* if it's impossible to satisfy the conditions (i.e., user size is
2385          * very close to UINT_MAX but is not a power-of-2 multiple of
2386          * page_size) then just return original size and let kernel reject it
2387          */
2388         return sz;
2389 }
2390
2391 static bool map_is_ringbuf(const struct bpf_map *map)
2392 {
2393         return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2394                map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2395 }
2396
2397 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2398 {
2399         map->def.type = def->map_type;
2400         map->def.key_size = def->key_size;
2401         map->def.value_size = def->value_size;
2402         map->def.max_entries = def->max_entries;
2403         map->def.map_flags = def->map_flags;
2404         map->map_extra = def->map_extra;
2405
2406         map->numa_node = def->numa_node;
2407         map->btf_key_type_id = def->key_type_id;
2408         map->btf_value_type_id = def->value_type_id;
2409
2410         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2411         if (map_is_ringbuf(map))
2412                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2413
2414         if (def->parts & MAP_DEF_MAP_TYPE)
2415                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2416
2417         if (def->parts & MAP_DEF_KEY_TYPE)
2418                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2419                          map->name, def->key_type_id, def->key_size);
2420         else if (def->parts & MAP_DEF_KEY_SIZE)
2421                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2422
2423         if (def->parts & MAP_DEF_VALUE_TYPE)
2424                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2425                          map->name, def->value_type_id, def->value_size);
2426         else if (def->parts & MAP_DEF_VALUE_SIZE)
2427                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2428
2429         if (def->parts & MAP_DEF_MAX_ENTRIES)
2430                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2431         if (def->parts & MAP_DEF_MAP_FLAGS)
2432                 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2433         if (def->parts & MAP_DEF_MAP_EXTRA)
2434                 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2435                          (unsigned long long)def->map_extra);
2436         if (def->parts & MAP_DEF_PINNING)
2437                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2438         if (def->parts & MAP_DEF_NUMA_NODE)
2439                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2440
2441         if (def->parts & MAP_DEF_INNER_MAP)
2442                 pr_debug("map '%s': found inner map definition.\n", map->name);
2443 }
2444
2445 static const char *btf_var_linkage_str(__u32 linkage)
2446 {
2447         switch (linkage) {
2448         case BTF_VAR_STATIC: return "static";
2449         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2450         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2451         default: return "unknown";
2452         }
2453 }
2454
2455 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2456                                          const struct btf_type *sec,
2457                                          int var_idx, int sec_idx,
2458                                          const Elf_Data *data, bool strict,
2459                                          const char *pin_root_path)
2460 {
2461         struct btf_map_def map_def = {}, inner_def = {};
2462         const struct btf_type *var, *def;
2463         const struct btf_var_secinfo *vi;
2464         const struct btf_var *var_extra;
2465         const char *map_name;
2466         struct bpf_map *map;
2467         int err;
2468
2469         vi = btf_var_secinfos(sec) + var_idx;
2470         var = btf__type_by_id(obj->btf, vi->type);
2471         var_extra = btf_var(var);
2472         map_name = btf__name_by_offset(obj->btf, var->name_off);
2473
2474         if (map_name == NULL || map_name[0] == '\0') {
2475                 pr_warn("map #%d: empty name.\n", var_idx);
2476                 return -EINVAL;
2477         }
2478         if ((__u64)vi->offset + vi->size > data->d_size) {
2479                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2480                 return -EINVAL;
2481         }
2482         if (!btf_is_var(var)) {
2483                 pr_warn("map '%s': unexpected var kind %s.\n",
2484                         map_name, btf_kind_str(var));
2485                 return -EINVAL;
2486         }
2487         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2488                 pr_warn("map '%s': unsupported map linkage %s.\n",
2489                         map_name, btf_var_linkage_str(var_extra->linkage));
2490                 return -EOPNOTSUPP;
2491         }
2492
2493         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2494         if (!btf_is_struct(def)) {
2495                 pr_warn("map '%s': unexpected def kind %s.\n",
2496                         map_name, btf_kind_str(var));
2497                 return -EINVAL;
2498         }
2499         if (def->size > vi->size) {
2500                 pr_warn("map '%s': invalid def size.\n", map_name);
2501                 return -EINVAL;
2502         }
2503
2504         map = bpf_object__add_map(obj);
2505         if (IS_ERR(map))
2506                 return PTR_ERR(map);
2507         map->name = strdup(map_name);
2508         if (!map->name) {
2509                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2510                 return -ENOMEM;
2511         }
2512         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2513         map->def.type = BPF_MAP_TYPE_UNSPEC;
2514         map->sec_idx = sec_idx;
2515         map->sec_offset = vi->offset;
2516         map->btf_var_idx = var_idx;
2517         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2518                  map_name, map->sec_idx, map->sec_offset);
2519
2520         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2521         if (err)
2522                 return err;
2523
2524         fill_map_from_def(map, &map_def);
2525
2526         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2527                 err = build_map_pin_path(map, pin_root_path);
2528                 if (err) {
2529                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2530                         return err;
2531                 }
2532         }
2533
2534         if (map_def.parts & MAP_DEF_INNER_MAP) {
2535                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2536                 if (!map->inner_map)
2537                         return -ENOMEM;
2538                 map->inner_map->fd = -1;
2539                 map->inner_map->sec_idx = sec_idx;
2540                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2541                 if (!map->inner_map->name)
2542                         return -ENOMEM;
2543                 sprintf(map->inner_map->name, "%s.inner", map_name);
2544
2545                 fill_map_from_def(map->inner_map, &inner_def);
2546         }
2547
2548         err = bpf_map_find_btf_info(obj, map);
2549         if (err)
2550                 return err;
2551
2552         return 0;
2553 }
2554
2555 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2556                                           const char *pin_root_path)
2557 {
2558         const struct btf_type *sec = NULL;
2559         int nr_types, i, vlen, err;
2560         const struct btf_type *t;
2561         const char *name;
2562         Elf_Data *data;
2563         Elf_Scn *scn;
2564
2565         if (obj->efile.btf_maps_shndx < 0)
2566                 return 0;
2567
2568         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2569         data = elf_sec_data(obj, scn);
2570         if (!scn || !data) {
2571                 pr_warn("elf: failed to get %s map definitions for %s\n",
2572                         MAPS_ELF_SEC, obj->path);
2573                 return -EINVAL;
2574         }
2575
2576         nr_types = btf__type_cnt(obj->btf);
2577         for (i = 1; i < nr_types; i++) {
2578                 t = btf__type_by_id(obj->btf, i);
2579                 if (!btf_is_datasec(t))
2580                         continue;
2581                 name = btf__name_by_offset(obj->btf, t->name_off);
2582                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2583                         sec = t;
2584                         obj->efile.btf_maps_sec_btf_id = i;
2585                         break;
2586                 }
2587         }
2588
2589         if (!sec) {
2590                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2591                 return -ENOENT;
2592         }
2593
2594         vlen = btf_vlen(sec);
2595         for (i = 0; i < vlen; i++) {
2596                 err = bpf_object__init_user_btf_map(obj, sec, i,
2597                                                     obj->efile.btf_maps_shndx,
2598                                                     data, strict,
2599                                                     pin_root_path);
2600                 if (err)
2601                         return err;
2602         }
2603
2604         return 0;
2605 }
2606
2607 static int bpf_object__init_maps(struct bpf_object *obj,
2608                                  const struct bpf_object_open_opts *opts)
2609 {
2610         const char *pin_root_path;
2611         bool strict;
2612         int err = 0;
2613
2614         strict = !OPTS_GET(opts, relaxed_maps, false);
2615         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2616
2617         err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2618         err = err ?: bpf_object__init_global_data_maps(obj);
2619         err = err ?: bpf_object__init_kconfig_map(obj);
2620         err = err ?: bpf_object__init_struct_ops_maps(obj);
2621
2622         return err;
2623 }
2624
2625 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2626 {
2627         Elf64_Shdr *sh;
2628
2629         sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2630         if (!sh)
2631                 return false;
2632
2633         return sh->sh_flags & SHF_EXECINSTR;
2634 }
2635
2636 static bool btf_needs_sanitization(struct bpf_object *obj)
2637 {
2638         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2639         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2640         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2641         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2642         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2643         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2644         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2645
2646         return !has_func || !has_datasec || !has_func_global || !has_float ||
2647                !has_decl_tag || !has_type_tag || !has_enum64;
2648 }
2649
2650 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2651 {
2652         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2653         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2654         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2655         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2656         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2657         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2658         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2659         int enum64_placeholder_id = 0;
2660         struct btf_type *t;
2661         int i, j, vlen;
2662
2663         for (i = 1; i < btf__type_cnt(btf); i++) {
2664                 t = (struct btf_type *)btf__type_by_id(btf, i);
2665
2666                 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2667                         /* replace VAR/DECL_TAG with INT */
2668                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2669                         /*
2670                          * using size = 1 is the safest choice, 4 will be too
2671                          * big and cause kernel BTF validation failure if
2672                          * original variable took less than 4 bytes
2673                          */
2674                         t->size = 1;
2675                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2676                 } else if (!has_datasec && btf_is_datasec(t)) {
2677                         /* replace DATASEC with STRUCT */
2678                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2679                         struct btf_member *m = btf_members(t);
2680                         struct btf_type *vt;
2681                         char *name;
2682
2683                         name = (char *)btf__name_by_offset(btf, t->name_off);
2684                         while (*name) {
2685                                 if (*name == '.')
2686                                         *name = '_';
2687                                 name++;
2688                         }
2689
2690                         vlen = btf_vlen(t);
2691                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2692                         for (j = 0; j < vlen; j++, v++, m++) {
2693                                 /* order of field assignments is important */
2694                                 m->offset = v->offset * 8;
2695                                 m->type = v->type;
2696                                 /* preserve variable name as member name */
2697                                 vt = (void *)btf__type_by_id(btf, v->type);
2698                                 m->name_off = vt->name_off;
2699                         }
2700                 } else if (!has_func && btf_is_func_proto(t)) {
2701                         /* replace FUNC_PROTO with ENUM */
2702                         vlen = btf_vlen(t);
2703                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2704                         t->size = sizeof(__u32); /* kernel enforced */
2705                 } else if (!has_func && btf_is_func(t)) {
2706                         /* replace FUNC with TYPEDEF */
2707                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2708                 } else if (!has_func_global && btf_is_func(t)) {
2709                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2710                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2711                 } else if (!has_float && btf_is_float(t)) {
2712                         /* replace FLOAT with an equally-sized empty STRUCT;
2713                          * since C compilers do not accept e.g. "float" as a
2714                          * valid struct name, make it anonymous
2715                          */
2716                         t->name_off = 0;
2717                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2718                 } else if (!has_type_tag && btf_is_type_tag(t)) {
2719                         /* replace TYPE_TAG with a CONST */
2720                         t->name_off = 0;
2721                         t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2722                 } else if (!has_enum64 && btf_is_enum(t)) {
2723                         /* clear the kflag */
2724                         t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2725                 } else if (!has_enum64 && btf_is_enum64(t)) {
2726                         /* replace ENUM64 with a union */
2727                         struct btf_member *m;
2728
2729                         if (enum64_placeholder_id == 0) {
2730                                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2731                                 if (enum64_placeholder_id < 0)
2732                                         return enum64_placeholder_id;
2733
2734                                 t = (struct btf_type *)btf__type_by_id(btf, i);
2735                         }
2736
2737                         m = btf_members(t);
2738                         vlen = btf_vlen(t);
2739                         t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2740                         for (j = 0; j < vlen; j++, m++) {
2741                                 m->type = enum64_placeholder_id;
2742                                 m->offset = 0;
2743                         }
2744                 }
2745         }
2746
2747         return 0;
2748 }
2749
2750 static bool libbpf_needs_btf(const struct bpf_object *obj)
2751 {
2752         return obj->efile.btf_maps_shndx >= 0 ||
2753                obj->efile.st_ops_shndx >= 0 ||
2754                obj->nr_extern > 0;
2755 }
2756
2757 static bool kernel_needs_btf(const struct bpf_object *obj)
2758 {
2759         return obj->efile.st_ops_shndx >= 0;
2760 }
2761
2762 static int bpf_object__init_btf(struct bpf_object *obj,
2763                                 Elf_Data *btf_data,
2764                                 Elf_Data *btf_ext_data)
2765 {
2766         int err = -ENOENT;
2767
2768         if (btf_data) {
2769                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2770                 err = libbpf_get_error(obj->btf);
2771                 if (err) {
2772                         obj->btf = NULL;
2773                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2774                         goto out;
2775                 }
2776                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2777                 btf__set_pointer_size(obj->btf, 8);
2778         }
2779         if (btf_ext_data) {
2780                 struct btf_ext_info *ext_segs[3];
2781                 int seg_num, sec_num;
2782
2783                 if (!obj->btf) {
2784                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2785                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2786                         goto out;
2787                 }
2788                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2789                 err = libbpf_get_error(obj->btf_ext);
2790                 if (err) {
2791                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2792                                 BTF_EXT_ELF_SEC, err);
2793                         obj->btf_ext = NULL;
2794                         goto out;
2795                 }
2796
2797                 /* setup .BTF.ext to ELF section mapping */
2798                 ext_segs[0] = &obj->btf_ext->func_info;
2799                 ext_segs[1] = &obj->btf_ext->line_info;
2800                 ext_segs[2] = &obj->btf_ext->core_relo_info;
2801                 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
2802                         struct btf_ext_info *seg = ext_segs[seg_num];
2803                         const struct btf_ext_info_sec *sec;
2804                         const char *sec_name;
2805                         Elf_Scn *scn;
2806
2807                         if (seg->sec_cnt == 0)
2808                                 continue;
2809
2810                         seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
2811                         if (!seg->sec_idxs) {
2812                                 err = -ENOMEM;
2813                                 goto out;
2814                         }
2815
2816                         sec_num = 0;
2817                         for_each_btf_ext_sec(seg, sec) {
2818                                 /* preventively increment index to avoid doing
2819                                  * this before every continue below
2820                                  */
2821                                 sec_num++;
2822
2823                                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
2824                                 if (str_is_empty(sec_name))
2825                                         continue;
2826                                 scn = elf_sec_by_name(obj, sec_name);
2827                                 if (!scn)
2828                                         continue;
2829
2830                                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
2831                         }
2832                 }
2833         }
2834 out:
2835         if (err && libbpf_needs_btf(obj)) {
2836                 pr_warn("BTF is required, but is missing or corrupted.\n");
2837                 return err;
2838         }
2839         return 0;
2840 }
2841
2842 static int compare_vsi_off(const void *_a, const void *_b)
2843 {
2844         const struct btf_var_secinfo *a = _a;
2845         const struct btf_var_secinfo *b = _b;
2846
2847         return a->offset - b->offset;
2848 }
2849
2850 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
2851                              struct btf_type *t)
2852 {
2853         __u32 size = 0, off = 0, i, vars = btf_vlen(t);
2854         const char *name = btf__name_by_offset(btf, t->name_off);
2855         const struct btf_type *t_var;
2856         struct btf_var_secinfo *vsi;
2857         const struct btf_var *var;
2858         int ret;
2859
2860         if (!name) {
2861                 pr_debug("No name found in string section for DATASEC kind.\n");
2862                 return -ENOENT;
2863         }
2864
2865         /* .extern datasec size and var offsets were set correctly during
2866          * extern collection step, so just skip straight to sorting variables
2867          */
2868         if (t->size)
2869                 goto sort_vars;
2870
2871         ret = find_elf_sec_sz(obj, name, &size);
2872         if (ret || !size) {
2873                 pr_debug("Invalid size for section %s: %u bytes\n", name, size);
2874                 return -ENOENT;
2875         }
2876
2877         t->size = size;
2878
2879         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
2880                 t_var = btf__type_by_id(btf, vsi->type);
2881                 if (!t_var || !btf_is_var(t_var)) {
2882                         pr_debug("Non-VAR type seen in section %s\n", name);
2883                         return -EINVAL;
2884                 }
2885
2886                 var = btf_var(t_var);
2887                 if (var->linkage == BTF_VAR_STATIC)
2888                         continue;
2889
2890                 name = btf__name_by_offset(btf, t_var->name_off);
2891                 if (!name) {
2892                         pr_debug("No name found in string section for VAR kind\n");
2893                         return -ENOENT;
2894                 }
2895
2896                 ret = find_elf_var_offset(obj, name, &off);
2897                 if (ret) {
2898                         pr_debug("No offset found in symbol table for VAR %s\n",
2899                                  name);
2900                         return -ENOENT;
2901                 }
2902
2903                 vsi->offset = off;
2904         }
2905
2906 sort_vars:
2907         qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
2908         return 0;
2909 }
2910
2911 static int btf_finalize_data(struct bpf_object *obj, struct btf *btf)
2912 {
2913         int err = 0;
2914         __u32 i, n = btf__type_cnt(btf);
2915
2916         for (i = 1; i < n; i++) {
2917                 struct btf_type *t = btf_type_by_id(btf, i);
2918
2919                 /* Loader needs to fix up some of the things compiler
2920                  * couldn't get its hands on while emitting BTF. This
2921                  * is section size and global variable offset. We use
2922                  * the info from the ELF itself for this purpose.
2923                  */
2924                 if (btf_is_datasec(t)) {
2925                         err = btf_fixup_datasec(obj, btf, t);
2926                         if (err)
2927                                 break;
2928                 }
2929         }
2930
2931         return libbpf_err(err);
2932 }
2933
2934 static int bpf_object__finalize_btf(struct bpf_object *obj)
2935 {
2936         int err;
2937
2938         if (!obj->btf)
2939                 return 0;
2940
2941         err = btf_finalize_data(obj, obj->btf);
2942         if (err) {
2943                 pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);
2944                 return err;
2945         }
2946
2947         return 0;
2948 }
2949
2950 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2951 {
2952         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
2953             prog->type == BPF_PROG_TYPE_LSM)
2954                 return true;
2955
2956         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
2957          * also need vmlinux BTF
2958          */
2959         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
2960                 return true;
2961
2962         return false;
2963 }
2964
2965 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
2966 {
2967         struct bpf_program *prog;
2968         int i;
2969
2970         /* CO-RE relocations need kernel BTF, only when btf_custom_path
2971          * is not specified
2972          */
2973         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
2974                 return true;
2975
2976         /* Support for typed ksyms needs kernel BTF */
2977         for (i = 0; i < obj->nr_extern; i++) {
2978                 const struct extern_desc *ext;
2979
2980                 ext = &obj->externs[i];
2981                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
2982                         return true;
2983         }
2984
2985         bpf_object__for_each_program(prog, obj) {
2986                 if (!prog->autoload)
2987                         continue;
2988                 if (prog_needs_vmlinux_btf(prog))
2989                         return true;
2990         }
2991
2992         return false;
2993 }
2994
2995 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
2996 {
2997         int err;
2998
2999         /* btf_vmlinux could be loaded earlier */
3000         if (obj->btf_vmlinux || obj->gen_loader)
3001                 return 0;
3002
3003         if (!force && !obj_needs_vmlinux_btf(obj))
3004                 return 0;
3005
3006         obj->btf_vmlinux = btf__load_vmlinux_btf();
3007         err = libbpf_get_error(obj->btf_vmlinux);
3008         if (err) {
3009                 pr_warn("Error loading vmlinux BTF: %d\n", err);
3010                 obj->btf_vmlinux = NULL;
3011                 return err;
3012         }
3013         return 0;
3014 }
3015
3016 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3017 {
3018         struct btf *kern_btf = obj->btf;
3019         bool btf_mandatory, sanitize;
3020         int i, err = 0;
3021
3022         if (!obj->btf)
3023                 return 0;
3024
3025         if (!kernel_supports(obj, FEAT_BTF)) {
3026                 if (kernel_needs_btf(obj)) {
3027                         err = -EOPNOTSUPP;
3028                         goto report;
3029                 }
3030                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3031                 return 0;
3032         }
3033
3034         /* Even though some subprogs are global/weak, user might prefer more
3035          * permissive BPF verification process that BPF verifier performs for
3036          * static functions, taking into account more context from the caller
3037          * functions. In such case, they need to mark such subprogs with
3038          * __attribute__((visibility("hidden"))) and libbpf will adjust
3039          * corresponding FUNC BTF type to be marked as static and trigger more
3040          * involved BPF verification process.
3041          */
3042         for (i = 0; i < obj->nr_programs; i++) {
3043                 struct bpf_program *prog = &obj->programs[i];
3044                 struct btf_type *t;
3045                 const char *name;
3046                 int j, n;
3047
3048                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3049                         continue;
3050
3051                 n = btf__type_cnt(obj->btf);
3052                 for (j = 1; j < n; j++) {
3053                         t = btf_type_by_id(obj->btf, j);
3054                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3055                                 continue;
3056
3057                         name = btf__str_by_offset(obj->btf, t->name_off);
3058                         if (strcmp(name, prog->name) != 0)
3059                                 continue;
3060
3061                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3062                         break;
3063                 }
3064         }
3065
3066         sanitize = btf_needs_sanitization(obj);
3067         if (sanitize) {
3068                 const void *raw_data;
3069                 __u32 sz;
3070
3071                 /* clone BTF to sanitize a copy and leave the original intact */
3072                 raw_data = btf__raw_data(obj->btf, &sz);
3073                 kern_btf = btf__new(raw_data, sz);
3074                 err = libbpf_get_error(kern_btf);
3075                 if (err)
3076                         return err;
3077
3078                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3079                 btf__set_pointer_size(obj->btf, 8);
3080                 err = bpf_object__sanitize_btf(obj, kern_btf);
3081                 if (err)
3082                         return err;
3083         }
3084
3085         if (obj->gen_loader) {
3086                 __u32 raw_size = 0;
3087                 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3088
3089                 if (!raw_data)
3090                         return -ENOMEM;
3091                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3092                 /* Pretend to have valid FD to pass various fd >= 0 checks.
3093                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3094                  */
3095                 btf__set_fd(kern_btf, 0);
3096         } else {
3097                 /* currently BPF_BTF_LOAD only supports log_level 1 */
3098                 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3099                                            obj->log_level ? 1 : 0);
3100         }
3101         if (sanitize) {
3102                 if (!err) {
3103                         /* move fd to libbpf's BTF */
3104                         btf__set_fd(obj->btf, btf__fd(kern_btf));
3105                         btf__set_fd(kern_btf, -1);
3106                 }
3107                 btf__free(kern_btf);
3108         }
3109 report:
3110         if (err) {
3111                 btf_mandatory = kernel_needs_btf(obj);
3112                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3113                         btf_mandatory ? "BTF is mandatory, can't proceed."
3114                                       : "BTF is optional, ignoring.");
3115                 if (!btf_mandatory)
3116                         err = 0;
3117         }
3118         return err;
3119 }
3120
3121 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3122 {
3123         const char *name;
3124
3125         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3126         if (!name) {
3127                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3128                         off, obj->path, elf_errmsg(-1));
3129                 return NULL;
3130         }
3131
3132         return name;
3133 }
3134
3135 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3136 {
3137         const char *name;
3138
3139         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3140         if (!name) {
3141                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3142                         off, obj->path, elf_errmsg(-1));
3143                 return NULL;
3144         }
3145
3146         return name;
3147 }
3148
3149 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3150 {
3151         Elf_Scn *scn;
3152
3153         scn = elf_getscn(obj->efile.elf, idx);
3154         if (!scn) {
3155                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3156                         idx, obj->path, elf_errmsg(-1));
3157                 return NULL;
3158         }
3159         return scn;
3160 }
3161
3162 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3163 {
3164         Elf_Scn *scn = NULL;
3165         Elf *elf = obj->efile.elf;
3166         const char *sec_name;
3167
3168         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3169                 sec_name = elf_sec_name(obj, scn);
3170                 if (!sec_name)
3171                         return NULL;
3172
3173                 if (strcmp(sec_name, name) != 0)
3174                         continue;
3175
3176                 return scn;
3177         }
3178         return NULL;
3179 }
3180
3181 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3182 {
3183         Elf64_Shdr *shdr;
3184
3185         if (!scn)
3186                 return NULL;
3187
3188         shdr = elf64_getshdr(scn);
3189         if (!shdr) {
3190                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3191                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3192                 return NULL;
3193         }
3194
3195         return shdr;
3196 }
3197
3198 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3199 {
3200         const char *name;
3201         Elf64_Shdr *sh;
3202
3203         if (!scn)
3204                 return NULL;
3205
3206         sh = elf_sec_hdr(obj, scn);
3207         if (!sh)
3208                 return NULL;
3209
3210         name = elf_sec_str(obj, sh->sh_name);
3211         if (!name) {
3212                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3213                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3214                 return NULL;
3215         }
3216
3217         return name;
3218 }
3219
3220 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3221 {
3222         Elf_Data *data;
3223
3224         if (!scn)
3225                 return NULL;
3226
3227         data = elf_getdata(scn, 0);
3228         if (!data) {
3229                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3230                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3231                         obj->path, elf_errmsg(-1));
3232                 return NULL;
3233         }
3234
3235         return data;
3236 }
3237
3238 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3239 {
3240         if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3241                 return NULL;
3242
3243         return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3244 }
3245
3246 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3247 {
3248         if (idx >= data->d_size / sizeof(Elf64_Rel))
3249                 return NULL;
3250
3251         return (Elf64_Rel *)data->d_buf + idx;
3252 }
3253
3254 static bool is_sec_name_dwarf(const char *name)
3255 {
3256         /* approximation, but the actual list is too long */
3257         return str_has_pfx(name, ".debug_");
3258 }
3259
3260 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3261 {
3262         /* no special handling of .strtab */
3263         if (hdr->sh_type == SHT_STRTAB)
3264                 return true;
3265
3266         /* ignore .llvm_addrsig section as well */
3267         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3268                 return true;
3269
3270         /* no subprograms will lead to an empty .text section, ignore it */
3271         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3272             strcmp(name, ".text") == 0)
3273                 return true;
3274
3275         /* DWARF sections */
3276         if (is_sec_name_dwarf(name))
3277                 return true;
3278
3279         if (str_has_pfx(name, ".rel")) {
3280                 name += sizeof(".rel") - 1;
3281                 /* DWARF section relocations */
3282                 if (is_sec_name_dwarf(name))
3283                         return true;
3284
3285                 /* .BTF and .BTF.ext don't need relocations */
3286                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3287                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
3288                         return true;
3289         }
3290
3291         return false;
3292 }
3293
3294 static int cmp_progs(const void *_a, const void *_b)
3295 {
3296         const struct bpf_program *a = _a;
3297         const struct bpf_program *b = _b;
3298
3299         if (a->sec_idx != b->sec_idx)
3300                 return a->sec_idx < b->sec_idx ? -1 : 1;
3301
3302         /* sec_insn_off can't be the same within the section */
3303         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3304 }
3305
3306 static int bpf_object__elf_collect(struct bpf_object *obj)
3307 {
3308         struct elf_sec_desc *sec_desc;
3309         Elf *elf = obj->efile.elf;
3310         Elf_Data *btf_ext_data = NULL;
3311         Elf_Data *btf_data = NULL;
3312         int idx = 0, err = 0;
3313         const char *name;
3314         Elf_Data *data;
3315         Elf_Scn *scn;
3316         Elf64_Shdr *sh;
3317
3318         /* ELF section indices are 0-based, but sec #0 is special "invalid"
3319          * section. Since section count retrieved by elf_getshdrnum() does
3320          * include sec #0, it is already the necessary size of an array to keep
3321          * all the sections.
3322          */
3323         if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3324                 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3325                         obj->path, elf_errmsg(-1));
3326                 return -LIBBPF_ERRNO__FORMAT;
3327         }
3328         obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3329         if (!obj->efile.secs)
3330                 return -ENOMEM;
3331
3332         /* a bunch of ELF parsing functionality depends on processing symbols,
3333          * so do the first pass and find the symbol table
3334          */
3335         scn = NULL;
3336         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3337                 sh = elf_sec_hdr(obj, scn);
3338                 if (!sh)
3339                         return -LIBBPF_ERRNO__FORMAT;
3340
3341                 if (sh->sh_type == SHT_SYMTAB) {
3342                         if (obj->efile.symbols) {
3343                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3344                                 return -LIBBPF_ERRNO__FORMAT;
3345                         }
3346
3347                         data = elf_sec_data(obj, scn);
3348                         if (!data)
3349                                 return -LIBBPF_ERRNO__FORMAT;
3350
3351                         idx = elf_ndxscn(scn);
3352
3353                         obj->efile.symbols = data;
3354                         obj->efile.symbols_shndx = idx;
3355                         obj->efile.strtabidx = sh->sh_link;
3356                 }
3357         }
3358
3359         if (!obj->efile.symbols) {
3360                 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3361                         obj->path);
3362                 return -ENOENT;
3363         }
3364
3365         scn = NULL;
3366         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3367                 idx = elf_ndxscn(scn);
3368                 sec_desc = &obj->efile.secs[idx];
3369
3370                 sh = elf_sec_hdr(obj, scn);
3371                 if (!sh)
3372                         return -LIBBPF_ERRNO__FORMAT;
3373
3374                 name = elf_sec_str(obj, sh->sh_name);
3375                 if (!name)
3376                         return -LIBBPF_ERRNO__FORMAT;
3377
3378                 if (ignore_elf_section(sh, name))
3379                         continue;
3380
3381                 data = elf_sec_data(obj, scn);
3382                 if (!data)
3383                         return -LIBBPF_ERRNO__FORMAT;
3384
3385                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3386                          idx, name, (unsigned long)data->d_size,
3387                          (int)sh->sh_link, (unsigned long)sh->sh_flags,
3388                          (int)sh->sh_type);
3389
3390                 if (strcmp(name, "license") == 0) {
3391                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3392                         if (err)
3393                                 return err;
3394                 } else if (strcmp(name, "version") == 0) {
3395                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3396                         if (err)
3397                                 return err;
3398                 } else if (strcmp(name, "maps") == 0) {
3399                         pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3400                         return -ENOTSUP;
3401                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3402                         obj->efile.btf_maps_shndx = idx;
3403                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3404                         if (sh->sh_type != SHT_PROGBITS)
3405                                 return -LIBBPF_ERRNO__FORMAT;
3406                         btf_data = data;
3407                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3408                         if (sh->sh_type != SHT_PROGBITS)
3409                                 return -LIBBPF_ERRNO__FORMAT;
3410                         btf_ext_data = data;
3411                 } else if (sh->sh_type == SHT_SYMTAB) {
3412                         /* already processed during the first pass above */
3413                 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3414                         if (sh->sh_flags & SHF_EXECINSTR) {
3415                                 if (strcmp(name, ".text") == 0)
3416                                         obj->efile.text_shndx = idx;
3417                                 err = bpf_object__add_programs(obj, data, name, idx);
3418                                 if (err)
3419                                         return err;
3420                         } else if (strcmp(name, DATA_SEC) == 0 ||
3421                                    str_has_pfx(name, DATA_SEC ".")) {
3422                                 sec_desc->sec_type = SEC_DATA;
3423                                 sec_desc->shdr = sh;
3424                                 sec_desc->data = data;
3425                         } else if (strcmp(name, RODATA_SEC) == 0 ||
3426                                    str_has_pfx(name, RODATA_SEC ".")) {
3427                                 sec_desc->sec_type = SEC_RODATA;
3428                                 sec_desc->shdr = sh;
3429                                 sec_desc->data = data;
3430                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3431                                 obj->efile.st_ops_data = data;
3432                                 obj->efile.st_ops_shndx = idx;
3433                         } else {
3434                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3435                                         idx, name);
3436                         }
3437                 } else if (sh->sh_type == SHT_REL) {
3438                         int targ_sec_idx = sh->sh_info; /* points to other section */
3439
3440                         if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3441                             targ_sec_idx >= obj->efile.sec_cnt)
3442                                 return -LIBBPF_ERRNO__FORMAT;
3443
3444                         /* Only do relo for section with exec instructions */
3445                         if (!section_have_execinstr(obj, targ_sec_idx) &&
3446                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3447                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3448                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3449                                         idx, name, targ_sec_idx,
3450                                         elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3451                                 continue;
3452                         }
3453
3454                         sec_desc->sec_type = SEC_RELO;
3455                         sec_desc->shdr = sh;
3456                         sec_desc->data = data;
3457                 } else if (sh->sh_type == SHT_NOBITS && strcmp(name, BSS_SEC) == 0) {
3458                         sec_desc->sec_type = SEC_BSS;
3459                         sec_desc->shdr = sh;
3460                         sec_desc->data = data;
3461                 } else {
3462                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3463                                 (size_t)sh->sh_size);
3464                 }
3465         }
3466
3467         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3468                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3469                 return -LIBBPF_ERRNO__FORMAT;
3470         }
3471
3472         /* sort BPF programs by section name and in-section instruction offset
3473          * for faster search */
3474         if (obj->nr_programs)
3475                 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3476
3477         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3478 }
3479
3480 static bool sym_is_extern(const Elf64_Sym *sym)
3481 {
3482         int bind = ELF64_ST_BIND(sym->st_info);
3483         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3484         return sym->st_shndx == SHN_UNDEF &&
3485                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3486                ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3487 }
3488
3489 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3490 {
3491         int bind = ELF64_ST_BIND(sym->st_info);
3492         int type = ELF64_ST_TYPE(sym->st_info);
3493
3494         /* in .text section */
3495         if (sym->st_shndx != text_shndx)
3496                 return false;
3497
3498         /* local function */
3499         if (bind == STB_LOCAL && type == STT_SECTION)
3500                 return true;
3501
3502         /* global function */
3503         return bind == STB_GLOBAL && type == STT_FUNC;
3504 }
3505
3506 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3507 {
3508         const struct btf_type *t;
3509         const char *tname;
3510         int i, n;
3511
3512         if (!btf)
3513                 return -ESRCH;
3514
3515         n = btf__type_cnt(btf);
3516         for (i = 1; i < n; i++) {
3517                 t = btf__type_by_id(btf, i);
3518
3519                 if (!btf_is_var(t) && !btf_is_func(t))
3520                         continue;
3521
3522                 tname = btf__name_by_offset(btf, t->name_off);
3523                 if (strcmp(tname, ext_name))
3524                         continue;
3525
3526                 if (btf_is_var(t) &&
3527                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3528                         return -EINVAL;
3529
3530                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3531                         return -EINVAL;
3532
3533                 return i;
3534         }
3535
3536         return -ENOENT;
3537 }
3538
3539 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3540         const struct btf_var_secinfo *vs;
3541         const struct btf_type *t;
3542         int i, j, n;
3543
3544         if (!btf)
3545                 return -ESRCH;
3546
3547         n = btf__type_cnt(btf);
3548         for (i = 1; i < n; i++) {
3549                 t = btf__type_by_id(btf, i);
3550
3551                 if (!btf_is_datasec(t))
3552                         continue;
3553
3554                 vs = btf_var_secinfos(t);
3555                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3556                         if (vs->type == ext_btf_id)
3557                                 return i;
3558                 }
3559         }
3560
3561         return -ENOENT;
3562 }
3563
3564 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3565                                      bool *is_signed)
3566 {
3567         const struct btf_type *t;
3568         const char *name;
3569
3570         t = skip_mods_and_typedefs(btf, id, NULL);
3571         name = btf__name_by_offset(btf, t->name_off);
3572
3573         if (is_signed)
3574                 *is_signed = false;
3575         switch (btf_kind(t)) {
3576         case BTF_KIND_INT: {
3577                 int enc = btf_int_encoding(t);
3578
3579                 if (enc & BTF_INT_BOOL)
3580                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3581                 if (is_signed)
3582                         *is_signed = enc & BTF_INT_SIGNED;
3583                 if (t->size == 1)
3584                         return KCFG_CHAR;
3585                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3586                         return KCFG_UNKNOWN;
3587                 return KCFG_INT;
3588         }
3589         case BTF_KIND_ENUM:
3590                 if (t->size != 4)
3591                         return KCFG_UNKNOWN;
3592                 if (strcmp(name, "libbpf_tristate"))
3593                         return KCFG_UNKNOWN;
3594                 return KCFG_TRISTATE;
3595         case BTF_KIND_ENUM64:
3596                 if (strcmp(name, "libbpf_tristate"))
3597                         return KCFG_UNKNOWN;
3598                 return KCFG_TRISTATE;
3599         case BTF_KIND_ARRAY:
3600                 if (btf_array(t)->nelems == 0)
3601                         return KCFG_UNKNOWN;
3602                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3603                         return KCFG_UNKNOWN;
3604                 return KCFG_CHAR_ARR;
3605         default:
3606                 return KCFG_UNKNOWN;
3607         }
3608 }
3609
3610 static int cmp_externs(const void *_a, const void *_b)
3611 {
3612         const struct extern_desc *a = _a;
3613         const struct extern_desc *b = _b;
3614
3615         if (a->type != b->type)
3616                 return a->type < b->type ? -1 : 1;
3617
3618         if (a->type == EXT_KCFG) {
3619                 /* descending order by alignment requirements */
3620                 if (a->kcfg.align != b->kcfg.align)
3621                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3622                 /* ascending order by size, within same alignment class */
3623                 if (a->kcfg.sz != b->kcfg.sz)
3624                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3625         }
3626
3627         /* resolve ties by name */
3628         return strcmp(a->name, b->name);
3629 }
3630
3631 static int find_int_btf_id(const struct btf *btf)
3632 {
3633         const struct btf_type *t;
3634         int i, n;
3635
3636         n = btf__type_cnt(btf);
3637         for (i = 1; i < n; i++) {
3638                 t = btf__type_by_id(btf, i);
3639
3640                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3641                         return i;
3642         }
3643
3644         return 0;
3645 }
3646
3647 static int add_dummy_ksym_var(struct btf *btf)
3648 {
3649         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3650         const struct btf_var_secinfo *vs;
3651         const struct btf_type *sec;
3652
3653         if (!btf)
3654                 return 0;
3655
3656         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3657                                             BTF_KIND_DATASEC);
3658         if (sec_btf_id < 0)
3659                 return 0;
3660
3661         sec = btf__type_by_id(btf, sec_btf_id);
3662         vs = btf_var_secinfos(sec);
3663         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3664                 const struct btf_type *vt;
3665
3666                 vt = btf__type_by_id(btf, vs->type);
3667                 if (btf_is_func(vt))
3668                         break;
3669         }
3670
3671         /* No func in ksyms sec.  No need to add dummy var. */
3672         if (i == btf_vlen(sec))
3673                 return 0;
3674
3675         int_btf_id = find_int_btf_id(btf);
3676         dummy_var_btf_id = btf__add_var(btf,
3677                                         "dummy_ksym",
3678                                         BTF_VAR_GLOBAL_ALLOCATED,
3679                                         int_btf_id);
3680         if (dummy_var_btf_id < 0)
3681                 pr_warn("cannot create a dummy_ksym var\n");
3682
3683         return dummy_var_btf_id;
3684 }
3685
3686 static int bpf_object__collect_externs(struct bpf_object *obj)
3687 {
3688         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3689         const struct btf_type *t;
3690         struct extern_desc *ext;
3691         int i, n, off, dummy_var_btf_id;
3692         const char *ext_name, *sec_name;
3693         Elf_Scn *scn;
3694         Elf64_Shdr *sh;
3695
3696         if (!obj->efile.symbols)
3697                 return 0;
3698
3699         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3700         sh = elf_sec_hdr(obj, scn);
3701         if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3702                 return -LIBBPF_ERRNO__FORMAT;
3703
3704         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3705         if (dummy_var_btf_id < 0)
3706                 return dummy_var_btf_id;
3707
3708         n = sh->sh_size / sh->sh_entsize;
3709         pr_debug("looking for externs among %d symbols...\n", n);
3710
3711         for (i = 0; i < n; i++) {
3712                 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
3713
3714                 if (!sym)
3715                         return -LIBBPF_ERRNO__FORMAT;
3716                 if (!sym_is_extern(sym))
3717                         continue;
3718                 ext_name = elf_sym_str(obj, sym->st_name);
3719                 if (!ext_name || !ext_name[0])
3720                         continue;
3721
3722                 ext = obj->externs;
3723                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3724                 if (!ext)
3725                         return -ENOMEM;
3726                 obj->externs = ext;
3727                 ext = &ext[obj->nr_extern];
3728                 memset(ext, 0, sizeof(*ext));
3729                 obj->nr_extern++;
3730
3731                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3732                 if (ext->btf_id <= 0) {
3733                         pr_warn("failed to find BTF for extern '%s': %d\n",
3734                                 ext_name, ext->btf_id);
3735                         return ext->btf_id;
3736                 }
3737                 t = btf__type_by_id(obj->btf, ext->btf_id);
3738                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3739                 ext->sym_idx = i;
3740                 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
3741
3742                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3743                 if (ext->sec_btf_id <= 0) {
3744                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3745                                 ext_name, ext->btf_id, ext->sec_btf_id);
3746                         return ext->sec_btf_id;
3747                 }
3748                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3749                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3750
3751                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3752                         if (btf_is_func(t)) {
3753                                 pr_warn("extern function %s is unsupported under %s section\n",
3754                                         ext->name, KCONFIG_SEC);
3755                                 return -ENOTSUP;
3756                         }
3757                         kcfg_sec = sec;
3758                         ext->type = EXT_KCFG;
3759                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3760                         if (ext->kcfg.sz <= 0) {
3761                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3762                                         ext_name, ext->kcfg.sz);
3763                                 return ext->kcfg.sz;
3764                         }
3765                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3766                         if (ext->kcfg.align <= 0) {
3767                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3768                                         ext_name, ext->kcfg.align);
3769                                 return -EINVAL;
3770                         }
3771                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3772                                                         &ext->kcfg.is_signed);
3773                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3774                                 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
3775                                 return -ENOTSUP;
3776                         }
3777                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3778                         ksym_sec = sec;
3779                         ext->type = EXT_KSYM;
3780                         skip_mods_and_typedefs(obj->btf, t->type,
3781                                                &ext->ksym.type_id);
3782                 } else {
3783                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3784                         return -ENOTSUP;
3785                 }
3786         }
3787         pr_debug("collected %d externs total\n", obj->nr_extern);
3788
3789         if (!obj->nr_extern)
3790                 return 0;
3791
3792         /* sort externs by type, for kcfg ones also by (align, size, name) */
3793         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3794
3795         /* for .ksyms section, we need to turn all externs into allocated
3796          * variables in BTF to pass kernel verification; we do this by
3797          * pretending that each extern is a 8-byte variable
3798          */
3799         if (ksym_sec) {
3800                 /* find existing 4-byte integer type in BTF to use for fake
3801                  * extern variables in DATASEC
3802                  */
3803                 int int_btf_id = find_int_btf_id(obj->btf);
3804                 /* For extern function, a dummy_var added earlier
3805                  * will be used to replace the vs->type and
3806                  * its name string will be used to refill
3807                  * the missing param's name.
3808                  */
3809                 const struct btf_type *dummy_var;
3810
3811                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3812                 for (i = 0; i < obj->nr_extern; i++) {
3813                         ext = &obj->externs[i];
3814                         if (ext->type != EXT_KSYM)
3815                                 continue;
3816                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3817                                  i, ext->sym_idx, ext->name);
3818                 }
3819
3820                 sec = ksym_sec;
3821                 n = btf_vlen(sec);
3822                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3823                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3824                         struct btf_type *vt;
3825
3826                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3827                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3828                         ext = find_extern_by_name(obj, ext_name);
3829                         if (!ext) {
3830                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3831                                         btf_kind_str(vt), ext_name);
3832                                 return -ESRCH;
3833                         }
3834                         if (btf_is_func(vt)) {
3835                                 const struct btf_type *func_proto;
3836                                 struct btf_param *param;
3837                                 int j;
3838
3839                                 func_proto = btf__type_by_id(obj->btf,
3840                                                              vt->type);
3841                                 param = btf_params(func_proto);
3842                                 /* Reuse the dummy_var string if the
3843                                  * func proto does not have param name.
3844                                  */
3845                                 for (j = 0; j < btf_vlen(func_proto); j++)
3846                                         if (param[j].type && !param[j].name_off)
3847                                                 param[j].name_off =
3848                                                         dummy_var->name_off;
3849                                 vs->type = dummy_var_btf_id;
3850                                 vt->info &= ~0xffff;
3851                                 vt->info |= BTF_FUNC_GLOBAL;
3852                         } else {
3853                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3854                                 vt->type = int_btf_id;
3855                         }
3856                         vs->offset = off;
3857                         vs->size = sizeof(int);
3858                 }
3859                 sec->size = off;
3860         }
3861
3862         if (kcfg_sec) {
3863                 sec = kcfg_sec;
3864                 /* for kcfg externs calculate their offsets within a .kconfig map */
3865                 off = 0;
3866                 for (i = 0; i < obj->nr_extern; i++) {
3867                         ext = &obj->externs[i];
3868                         if (ext->type != EXT_KCFG)
3869                                 continue;
3870
3871                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3872                         off = ext->kcfg.data_off + ext->kcfg.sz;
3873                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3874                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3875                 }
3876                 sec->size = off;
3877                 n = btf_vlen(sec);
3878                 for (i = 0; i < n; i++) {
3879                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3880
3881                         t = btf__type_by_id(obj->btf, vs->type);
3882                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3883                         ext = find_extern_by_name(obj, ext_name);
3884                         if (!ext) {
3885                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3886                                         ext_name);
3887                                 return -ESRCH;
3888                         }
3889                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3890                         vs->offset = ext->kcfg.data_off;
3891                 }
3892         }
3893         return 0;
3894 }
3895
3896 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
3897 {
3898         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3899 }
3900
3901 struct bpf_program *
3902 bpf_object__find_program_by_name(const struct bpf_object *obj,
3903                                  const char *name)
3904 {
3905         struct bpf_program *prog;
3906
3907         bpf_object__for_each_program(prog, obj) {
3908                 if (prog_is_subprog(obj, prog))
3909                         continue;
3910                 if (!strcmp(prog->name, name))
3911                         return prog;
3912         }
3913         return errno = ENOENT, NULL;
3914 }
3915
3916 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3917                                       int shndx)
3918 {
3919         switch (obj->efile.secs[shndx].sec_type) {
3920         case SEC_BSS:
3921         case SEC_DATA:
3922         case SEC_RODATA:
3923                 return true;
3924         default:
3925                 return false;
3926         }
3927 }
3928
3929 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3930                                       int shndx)
3931 {
3932         return shndx == obj->efile.btf_maps_shndx;
3933 }
3934
3935 static enum libbpf_map_type
3936 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3937 {
3938         if (shndx == obj->efile.symbols_shndx)
3939                 return LIBBPF_MAP_KCONFIG;
3940
3941         switch (obj->efile.secs[shndx].sec_type) {
3942         case SEC_BSS:
3943                 return LIBBPF_MAP_BSS;
3944         case SEC_DATA:
3945                 return LIBBPF_MAP_DATA;
3946         case SEC_RODATA:
3947                 return LIBBPF_MAP_RODATA;
3948         default:
3949                 return LIBBPF_MAP_UNSPEC;
3950         }
3951 }
3952
3953 static int bpf_program__record_reloc(struct bpf_program *prog,
3954                                      struct reloc_desc *reloc_desc,
3955                                      __u32 insn_idx, const char *sym_name,
3956                                      const Elf64_Sym *sym, const Elf64_Rel *rel)
3957 {
3958         struct bpf_insn *insn = &prog->insns[insn_idx];
3959         size_t map_idx, nr_maps = prog->obj->nr_maps;
3960         struct bpf_object *obj = prog->obj;
3961         __u32 shdr_idx = sym->st_shndx;
3962         enum libbpf_map_type type;
3963         const char *sym_sec_name;
3964         struct bpf_map *map;
3965
3966         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
3967                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
3968                         prog->name, sym_name, insn_idx, insn->code);
3969                 return -LIBBPF_ERRNO__RELOC;
3970         }
3971
3972         if (sym_is_extern(sym)) {
3973                 int sym_idx = ELF64_R_SYM(rel->r_info);
3974                 int i, n = obj->nr_extern;
3975                 struct extern_desc *ext;
3976
3977                 for (i = 0; i < n; i++) {
3978                         ext = &obj->externs[i];
3979                         if (ext->sym_idx == sym_idx)
3980                                 break;
3981                 }
3982                 if (i >= n) {
3983                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
3984                                 prog->name, sym_name, sym_idx);
3985                         return -LIBBPF_ERRNO__RELOC;
3986                 }
3987                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
3988                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
3989                 if (insn->code == (BPF_JMP | BPF_CALL))
3990                         reloc_desc->type = RELO_EXTERN_FUNC;
3991                 else
3992                         reloc_desc->type = RELO_EXTERN_VAR;
3993                 reloc_desc->insn_idx = insn_idx;
3994                 reloc_desc->sym_off = i; /* sym_off stores extern index */
3995                 return 0;
3996         }
3997
3998         /* sub-program call relocation */
3999         if (is_call_insn(insn)) {
4000                 if (insn->src_reg != BPF_PSEUDO_CALL) {
4001                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4002                         return -LIBBPF_ERRNO__RELOC;
4003                 }
4004                 /* text_shndx can be 0, if no default "main" program exists */
4005                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4006                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4007                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4008                                 prog->name, sym_name, sym_sec_name);
4009                         return -LIBBPF_ERRNO__RELOC;
4010                 }
4011                 if (sym->st_value % BPF_INSN_SZ) {
4012                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4013                                 prog->name, sym_name, (size_t)sym->st_value);
4014                         return -LIBBPF_ERRNO__RELOC;
4015                 }
4016                 reloc_desc->type = RELO_CALL;
4017                 reloc_desc->insn_idx = insn_idx;
4018                 reloc_desc->sym_off = sym->st_value;
4019                 return 0;
4020         }
4021
4022         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4023                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4024                         prog->name, sym_name, shdr_idx);
4025                 return -LIBBPF_ERRNO__RELOC;
4026         }
4027
4028         /* loading subprog addresses */
4029         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4030                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4031                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
4032                  */
4033                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4034                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4035                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4036                         return -LIBBPF_ERRNO__RELOC;
4037                 }
4038
4039                 reloc_desc->type = RELO_SUBPROG_ADDR;
4040                 reloc_desc->insn_idx = insn_idx;
4041                 reloc_desc->sym_off = sym->st_value;
4042                 return 0;
4043         }
4044
4045         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4046         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4047
4048         /* generic map reference relocation */
4049         if (type == LIBBPF_MAP_UNSPEC) {
4050                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4051                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4052                                 prog->name, sym_name, sym_sec_name);
4053                         return -LIBBPF_ERRNO__RELOC;
4054                 }
4055                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4056                         map = &obj->maps[map_idx];
4057                         if (map->libbpf_type != type ||
4058                             map->sec_idx != sym->st_shndx ||
4059                             map->sec_offset != sym->st_value)
4060                                 continue;
4061                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4062                                  prog->name, map_idx, map->name, map->sec_idx,
4063                                  map->sec_offset, insn_idx);
4064                         break;
4065                 }
4066                 if (map_idx >= nr_maps) {
4067                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4068                                 prog->name, sym_sec_name, (size_t)sym->st_value);
4069                         return -LIBBPF_ERRNO__RELOC;
4070                 }
4071                 reloc_desc->type = RELO_LD64;
4072                 reloc_desc->insn_idx = insn_idx;
4073                 reloc_desc->map_idx = map_idx;
4074                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4075                 return 0;
4076         }
4077
4078         /* global data map relocation */
4079         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4080                 pr_warn("prog '%s': bad data relo against section '%s'\n",
4081                         prog->name, sym_sec_name);
4082                 return -LIBBPF_ERRNO__RELOC;
4083         }
4084         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4085                 map = &obj->maps[map_idx];
4086                 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4087                         continue;
4088                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4089                          prog->name, map_idx, map->name, map->sec_idx,
4090                          map->sec_offset, insn_idx);
4091                 break;
4092         }
4093         if (map_idx >= nr_maps) {
4094                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4095                         prog->name, sym_sec_name);
4096                 return -LIBBPF_ERRNO__RELOC;
4097         }
4098
4099         reloc_desc->type = RELO_DATA;
4100         reloc_desc->insn_idx = insn_idx;
4101         reloc_desc->map_idx = map_idx;
4102         reloc_desc->sym_off = sym->st_value;
4103         return 0;
4104 }
4105
4106 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4107 {
4108         return insn_idx >= prog->sec_insn_off &&
4109                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4110 }
4111
4112 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4113                                                  size_t sec_idx, size_t insn_idx)
4114 {
4115         int l = 0, r = obj->nr_programs - 1, m;
4116         struct bpf_program *prog;
4117
4118         while (l < r) {
4119                 m = l + (r - l + 1) / 2;
4120                 prog = &obj->programs[m];
4121
4122                 if (prog->sec_idx < sec_idx ||
4123                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4124                         l = m;
4125                 else
4126                         r = m - 1;
4127         }
4128         /* matching program could be at index l, but it still might be the
4129          * wrong one, so we need to double check conditions for the last time
4130          */
4131         prog = &obj->programs[l];
4132         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4133                 return prog;
4134         return NULL;
4135 }
4136
4137 static int
4138 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4139 {
4140         const char *relo_sec_name, *sec_name;
4141         size_t sec_idx = shdr->sh_info, sym_idx;
4142         struct bpf_program *prog;
4143         struct reloc_desc *relos;
4144         int err, i, nrels;
4145         const char *sym_name;
4146         __u32 insn_idx;
4147         Elf_Scn *scn;
4148         Elf_Data *scn_data;
4149         Elf64_Sym *sym;
4150         Elf64_Rel *rel;
4151
4152         if (sec_idx >= obj->efile.sec_cnt)
4153                 return -EINVAL;
4154
4155         scn = elf_sec_by_idx(obj, sec_idx);
4156         scn_data = elf_sec_data(obj, scn);
4157
4158         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4159         sec_name = elf_sec_name(obj, scn);
4160         if (!relo_sec_name || !sec_name)
4161                 return -EINVAL;
4162
4163         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4164                  relo_sec_name, sec_idx, sec_name);
4165         nrels = shdr->sh_size / shdr->sh_entsize;
4166
4167         for (i = 0; i < nrels; i++) {
4168                 rel = elf_rel_by_idx(data, i);
4169                 if (!rel) {
4170                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4171                         return -LIBBPF_ERRNO__FORMAT;
4172                 }
4173
4174                 sym_idx = ELF64_R_SYM(rel->r_info);
4175                 sym = elf_sym_by_idx(obj, sym_idx);
4176                 if (!sym) {
4177                         pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4178                                 relo_sec_name, sym_idx, i);
4179                         return -LIBBPF_ERRNO__FORMAT;
4180                 }
4181
4182                 if (sym->st_shndx >= obj->efile.sec_cnt) {
4183                         pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4184                                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4185                         return -LIBBPF_ERRNO__FORMAT;
4186                 }
4187
4188                 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4189                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4190                                 relo_sec_name, (size_t)rel->r_offset, i);
4191                         return -LIBBPF_ERRNO__FORMAT;
4192                 }
4193
4194                 insn_idx = rel->r_offset / BPF_INSN_SZ;
4195                 /* relocations against static functions are recorded as
4196                  * relocations against the section that contains a function;
4197                  * in such case, symbol will be STT_SECTION and sym.st_name
4198                  * will point to empty string (0), so fetch section name
4199                  * instead
4200                  */
4201                 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4202                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4203                 else
4204                         sym_name = elf_sym_str(obj, sym->st_name);
4205                 sym_name = sym_name ?: "<?";
4206
4207                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4208                          relo_sec_name, i, insn_idx, sym_name);
4209
4210                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4211                 if (!prog) {
4212                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4213                                 relo_sec_name, i, sec_name, insn_idx);
4214                         continue;
4215                 }
4216
4217                 relos = libbpf_reallocarray(prog->reloc_desc,
4218                                             prog->nr_reloc + 1, sizeof(*relos));
4219                 if (!relos)
4220                         return -ENOMEM;
4221                 prog->reloc_desc = relos;
4222
4223                 /* adjust insn_idx to local BPF program frame of reference */
4224                 insn_idx -= prog->sec_insn_off;
4225                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4226                                                 insn_idx, sym_name, sym, rel);
4227                 if (err)
4228                         return err;
4229
4230                 prog->nr_reloc++;
4231         }
4232         return 0;
4233 }
4234
4235 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
4236 {
4237         int id;
4238
4239         if (!obj->btf)
4240                 return -ENOENT;
4241
4242         /* if it's BTF-defined map, we don't need to search for type IDs.
4243          * For struct_ops map, it does not need btf_key_type_id and
4244          * btf_value_type_id.
4245          */
4246         if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4247                 return 0;
4248
4249         /*
4250          * LLVM annotates global data differently in BTF, that is,
4251          * only as '.data', '.bss' or '.rodata'.
4252          */
4253         if (!bpf_map__is_internal(map))
4254                 return -ENOENT;
4255
4256         id = btf__find_by_name(obj->btf, map->real_name);
4257         if (id < 0)
4258                 return id;
4259
4260         map->btf_key_type_id = 0;
4261         map->btf_value_type_id = id;
4262         return 0;
4263 }
4264
4265 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4266 {
4267         char file[PATH_MAX], buff[4096];
4268         FILE *fp;
4269         __u32 val;
4270         int err;
4271
4272         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4273         memset(info, 0, sizeof(*info));
4274
4275         fp = fopen(file, "r");
4276         if (!fp) {
4277                 err = -errno;
4278                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4279                         err);
4280                 return err;
4281         }
4282
4283         while (fgets(buff, sizeof(buff), fp)) {
4284                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4285                         info->type = val;
4286                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4287                         info->key_size = val;
4288                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4289                         info->value_size = val;
4290                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4291                         info->max_entries = val;
4292                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4293                         info->map_flags = val;
4294         }
4295
4296         fclose(fp);
4297
4298         return 0;
4299 }
4300
4301 bool bpf_map__autocreate(const struct bpf_map *map)
4302 {
4303         return map->autocreate;
4304 }
4305
4306 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4307 {
4308         if (map->obj->loaded)
4309                 return libbpf_err(-EBUSY);
4310
4311         map->autocreate = autocreate;
4312         return 0;
4313 }
4314
4315 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4316 {
4317         struct bpf_map_info info;
4318         __u32 len = sizeof(info), name_len;
4319         int new_fd, err;
4320         char *new_name;
4321
4322         memset(&info, 0, len);
4323         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4324         if (err && errno == EINVAL)
4325                 err = bpf_get_map_info_from_fdinfo(fd, &info);
4326         if (err)
4327                 return libbpf_err(err);
4328
4329         name_len = strlen(info.name);
4330         if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4331                 new_name = strdup(map->name);
4332         else
4333                 new_name = strdup(info.name);
4334
4335         if (!new_name)
4336                 return libbpf_err(-errno);
4337
4338         new_fd = open("/", O_RDONLY | O_CLOEXEC);
4339         if (new_fd < 0) {
4340                 err = -errno;
4341                 goto err_free_new_name;
4342         }
4343
4344         new_fd = dup3(fd, new_fd, O_CLOEXEC);
4345         if (new_fd < 0) {
4346                 err = -errno;
4347                 goto err_close_new_fd;
4348         }
4349
4350         err = zclose(map->fd);
4351         if (err) {
4352                 err = -errno;
4353                 goto err_close_new_fd;
4354         }
4355         free(map->name);
4356
4357         map->fd = new_fd;
4358         map->name = new_name;
4359         map->def.type = info.type;
4360         map->def.key_size = info.key_size;
4361         map->def.value_size = info.value_size;
4362         map->def.max_entries = info.max_entries;
4363         map->def.map_flags = info.map_flags;
4364         map->btf_key_type_id = info.btf_key_type_id;
4365         map->btf_value_type_id = info.btf_value_type_id;
4366         map->reused = true;
4367         map->map_extra = info.map_extra;
4368
4369         return 0;
4370
4371 err_close_new_fd:
4372         close(new_fd);
4373 err_free_new_name:
4374         free(new_name);
4375         return libbpf_err(err);
4376 }
4377
4378 __u32 bpf_map__max_entries(const struct bpf_map *map)
4379 {
4380         return map->def.max_entries;
4381 }
4382
4383 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4384 {
4385         if (!bpf_map_type__is_map_in_map(map->def.type))
4386                 return errno = EINVAL, NULL;
4387
4388         return map->inner_map;
4389 }
4390
4391 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4392 {
4393         if (map->obj->loaded)
4394                 return libbpf_err(-EBUSY);
4395
4396         map->def.max_entries = max_entries;
4397
4398         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4399         if (map_is_ringbuf(map))
4400                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4401
4402         return 0;
4403 }
4404
4405 static int
4406 bpf_object__probe_loading(struct bpf_object *obj)
4407 {
4408         char *cp, errmsg[STRERR_BUFSIZE];
4409         struct bpf_insn insns[] = {
4410                 BPF_MOV64_IMM(BPF_REG_0, 0),
4411                 BPF_EXIT_INSN(),
4412         };
4413         int ret, insn_cnt = ARRAY_SIZE(insns);
4414
4415         if (obj->gen_loader)
4416                 return 0;
4417
4418         ret = bump_rlimit_memlock();
4419         if (ret)
4420                 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4421
4422         /* make sure basic loading works */
4423         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4424         if (ret < 0)
4425                 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4426         if (ret < 0) {
4427                 ret = errno;
4428                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4429                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4430                         "program. Make sure your kernel supports BPF "
4431                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4432                         "set to big enough value.\n", __func__, cp, ret);
4433                 return -ret;
4434         }
4435         close(ret);
4436
4437         return 0;
4438 }
4439
4440 static int probe_fd(int fd)
4441 {
4442         if (fd >= 0)
4443                 close(fd);
4444         return fd >= 0;
4445 }
4446
4447 static int probe_kern_prog_name(void)
4448 {
4449         const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
4450         struct bpf_insn insns[] = {
4451                 BPF_MOV64_IMM(BPF_REG_0, 0),
4452                 BPF_EXIT_INSN(),
4453         };
4454         union bpf_attr attr;
4455         int ret;
4456
4457         memset(&attr, 0, attr_sz);
4458         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4459         attr.license = ptr_to_u64("GPL");
4460         attr.insns = ptr_to_u64(insns);
4461         attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
4462         libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
4463
4464         /* make sure loading with name works */
4465         ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
4466         return probe_fd(ret);
4467 }
4468
4469 static int probe_kern_global_data(void)
4470 {
4471         char *cp, errmsg[STRERR_BUFSIZE];
4472         struct bpf_insn insns[] = {
4473                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4474                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4475                 BPF_MOV64_IMM(BPF_REG_0, 0),
4476                 BPF_EXIT_INSN(),
4477         };
4478         int ret, map, insn_cnt = ARRAY_SIZE(insns);
4479
4480         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
4481         if (map < 0) {
4482                 ret = -errno;
4483                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4484                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4485                         __func__, cp, -ret);
4486                 return ret;
4487         }
4488
4489         insns[0].imm = map;
4490
4491         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4492         close(map);
4493         return probe_fd(ret);
4494 }
4495
4496 static int probe_kern_btf(void)
4497 {
4498         static const char strs[] = "\0int";
4499         __u32 types[] = {
4500                 /* int */
4501                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4502         };
4503
4504         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4505                                              strs, sizeof(strs)));
4506 }
4507
4508 static int probe_kern_btf_func(void)
4509 {
4510         static const char strs[] = "\0int\0x\0a";
4511         /* void x(int a) {} */
4512         __u32 types[] = {
4513                 /* int */
4514                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4515                 /* FUNC_PROTO */                                /* [2] */
4516                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4517                 BTF_PARAM_ENC(7, 1),
4518                 /* FUNC x */                                    /* [3] */
4519                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4520         };
4521
4522         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4523                                              strs, sizeof(strs)));
4524 }
4525
4526 static int probe_kern_btf_func_global(void)
4527 {
4528         static const char strs[] = "\0int\0x\0a";
4529         /* static void x(int a) {} */
4530         __u32 types[] = {
4531                 /* int */
4532                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4533                 /* FUNC_PROTO */                                /* [2] */
4534                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4535                 BTF_PARAM_ENC(7, 1),
4536                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4537                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4538         };
4539
4540         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4541                                              strs, sizeof(strs)));
4542 }
4543
4544 static int probe_kern_btf_datasec(void)
4545 {
4546         static const char strs[] = "\0x\0.data";
4547         /* static int a; */
4548         __u32 types[] = {
4549                 /* int */
4550                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4551                 /* VAR x */                                     /* [2] */
4552                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4553                 BTF_VAR_STATIC,
4554                 /* DATASEC val */                               /* [3] */
4555                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4556                 BTF_VAR_SECINFO_ENC(2, 0, 4),
4557         };
4558
4559         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4560                                              strs, sizeof(strs)));
4561 }
4562
4563 static int probe_kern_btf_float(void)
4564 {
4565         static const char strs[] = "\0float";
4566         __u32 types[] = {
4567                 /* float */
4568                 BTF_TYPE_FLOAT_ENC(1, 4),
4569         };
4570
4571         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4572                                              strs, sizeof(strs)));
4573 }
4574
4575 static int probe_kern_btf_decl_tag(void)
4576 {
4577         static const char strs[] = "\0tag";
4578         __u32 types[] = {
4579                 /* int */
4580                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4581                 /* VAR x */                                     /* [2] */
4582                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4583                 BTF_VAR_STATIC,
4584                 /* attr */
4585                 BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
4586         };
4587
4588         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4589                                              strs, sizeof(strs)));
4590 }
4591
4592 static int probe_kern_btf_type_tag(void)
4593 {
4594         static const char strs[] = "\0tag";
4595         __u32 types[] = {
4596                 /* int */
4597                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),          /* [1] */
4598                 /* attr */
4599                 BTF_TYPE_TYPE_TAG_ENC(1, 1),                            /* [2] */
4600                 /* ptr */
4601                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),   /* [3] */
4602         };
4603
4604         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4605                                              strs, sizeof(strs)));
4606 }
4607
4608 static int probe_kern_array_mmap(void)
4609 {
4610         LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
4611         int fd;
4612
4613         fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
4614         return probe_fd(fd);
4615 }
4616
4617 static int probe_kern_exp_attach_type(void)
4618 {
4619         LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
4620         struct bpf_insn insns[] = {
4621                 BPF_MOV64_IMM(BPF_REG_0, 0),
4622                 BPF_EXIT_INSN(),
4623         };
4624         int fd, insn_cnt = ARRAY_SIZE(insns);
4625
4626         /* use any valid combination of program type and (optional)
4627          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4628          * to see if kernel supports expected_attach_type field for
4629          * BPF_PROG_LOAD command
4630          */
4631         fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
4632         return probe_fd(fd);
4633 }
4634
4635 static int probe_kern_probe_read_kernel(void)
4636 {
4637         struct bpf_insn insns[] = {
4638                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4639                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4640                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
4641                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
4642                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4643                 BPF_EXIT_INSN(),
4644         };
4645         int fd, insn_cnt = ARRAY_SIZE(insns);
4646
4647         fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4648         return probe_fd(fd);
4649 }
4650
4651 static int probe_prog_bind_map(void)
4652 {
4653         char *cp, errmsg[STRERR_BUFSIZE];
4654         struct bpf_insn insns[] = {
4655                 BPF_MOV64_IMM(BPF_REG_0, 0),
4656                 BPF_EXIT_INSN(),
4657         };
4658         int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
4659
4660         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
4661         if (map < 0) {
4662                 ret = -errno;
4663                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4664                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4665                         __func__, cp, -ret);
4666                 return ret;
4667         }
4668
4669         prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4670         if (prog < 0) {
4671                 close(map);
4672                 return 0;
4673         }
4674
4675         ret = bpf_prog_bind_map(prog, map, NULL);
4676
4677         close(map);
4678         close(prog);
4679
4680         return ret >= 0;
4681 }
4682
4683 static int probe_module_btf(void)
4684 {
4685         static const char strs[] = "\0int";
4686         __u32 types[] = {
4687                 /* int */
4688                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4689         };
4690         struct bpf_btf_info info;
4691         __u32 len = sizeof(info);
4692         char name[16];
4693         int fd, err;
4694
4695         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4696         if (fd < 0)
4697                 return 0; /* BTF not supported at all */
4698
4699         memset(&info, 0, sizeof(info));
4700         info.name = ptr_to_u64(name);
4701         info.name_len = sizeof(name);
4702
4703         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4704          * kernel's module BTF support coincides with support for
4705          * name/name_len fields in struct bpf_btf_info.
4706          */
4707         err = bpf_obj_get_info_by_fd(fd, &info, &len);
4708         close(fd);
4709         return !err;
4710 }
4711
4712 static int probe_perf_link(void)
4713 {
4714         struct bpf_insn insns[] = {
4715                 BPF_MOV64_IMM(BPF_REG_0, 0),
4716                 BPF_EXIT_INSN(),
4717         };
4718         int prog_fd, link_fd, err;
4719
4720         prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
4721                                 insns, ARRAY_SIZE(insns), NULL);
4722         if (prog_fd < 0)
4723                 return -errno;
4724
4725         /* use invalid perf_event FD to get EBADF, if link is supported;
4726          * otherwise EINVAL should be returned
4727          */
4728         link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
4729         err = -errno; /* close() can clobber errno */
4730
4731         if (link_fd >= 0)
4732                 close(link_fd);
4733         close(prog_fd);
4734
4735         return link_fd < 0 && err == -EBADF;
4736 }
4737
4738 static int probe_kern_bpf_cookie(void)
4739 {
4740         struct bpf_insn insns[] = {
4741                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
4742                 BPF_EXIT_INSN(),
4743         };
4744         int ret, insn_cnt = ARRAY_SIZE(insns);
4745
4746         ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
4747         return probe_fd(ret);
4748 }
4749
4750 static int probe_kern_btf_enum64(void)
4751 {
4752         static const char strs[] = "\0enum64";
4753         __u32 types[] = {
4754                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
4755         };
4756
4757         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4758                                              strs, sizeof(strs)));
4759 }
4760
4761 static int probe_kern_syscall_wrapper(void);
4762
4763 enum kern_feature_result {
4764         FEAT_UNKNOWN = 0,
4765         FEAT_SUPPORTED = 1,
4766         FEAT_MISSING = 2,
4767 };
4768
4769 typedef int (*feature_probe_fn)(void);
4770
4771 static struct kern_feature_desc {
4772         const char *desc;
4773         feature_probe_fn probe;
4774         enum kern_feature_result res;
4775 } feature_probes[__FEAT_CNT] = {
4776         [FEAT_PROG_NAME] = {
4777                 "BPF program name", probe_kern_prog_name,
4778         },
4779         [FEAT_GLOBAL_DATA] = {
4780                 "global variables", probe_kern_global_data,
4781         },
4782         [FEAT_BTF] = {
4783                 "minimal BTF", probe_kern_btf,
4784         },
4785         [FEAT_BTF_FUNC] = {
4786                 "BTF functions", probe_kern_btf_func,
4787         },
4788         [FEAT_BTF_GLOBAL_FUNC] = {
4789                 "BTF global function", probe_kern_btf_func_global,
4790         },
4791         [FEAT_BTF_DATASEC] = {
4792                 "BTF data section and variable", probe_kern_btf_datasec,
4793         },
4794         [FEAT_ARRAY_MMAP] = {
4795                 "ARRAY map mmap()", probe_kern_array_mmap,
4796         },
4797         [FEAT_EXP_ATTACH_TYPE] = {
4798                 "BPF_PROG_LOAD expected_attach_type attribute",
4799                 probe_kern_exp_attach_type,
4800         },
4801         [FEAT_PROBE_READ_KERN] = {
4802                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4803         },
4804         [FEAT_PROG_BIND_MAP] = {
4805                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4806         },
4807         [FEAT_MODULE_BTF] = {
4808                 "module BTF support", probe_module_btf,
4809         },
4810         [FEAT_BTF_FLOAT] = {
4811                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4812         },
4813         [FEAT_PERF_LINK] = {
4814                 "BPF perf link support", probe_perf_link,
4815         },
4816         [FEAT_BTF_DECL_TAG] = {
4817                 "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
4818         },
4819         [FEAT_BTF_TYPE_TAG] = {
4820                 "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
4821         },
4822         [FEAT_MEMCG_ACCOUNT] = {
4823                 "memcg-based memory accounting", probe_memcg_account,
4824         },
4825         [FEAT_BPF_COOKIE] = {
4826                 "BPF cookie support", probe_kern_bpf_cookie,
4827         },
4828         [FEAT_BTF_ENUM64] = {
4829                 "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
4830         },
4831         [FEAT_SYSCALL_WRAPPER] = {
4832                 "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
4833         },
4834 };
4835
4836 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4837 {
4838         struct kern_feature_desc *feat = &feature_probes[feat_id];
4839         int ret;
4840
4841         if (obj && obj->gen_loader)
4842                 /* To generate loader program assume the latest kernel
4843                  * to avoid doing extra prog_load, map_create syscalls.
4844                  */
4845                 return true;
4846
4847         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4848                 ret = feat->probe();
4849                 if (ret > 0) {
4850                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4851                 } else if (ret == 0) {
4852                         WRITE_ONCE(feat->res, FEAT_MISSING);
4853                 } else {
4854                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4855                         WRITE_ONCE(feat->res, FEAT_MISSING);
4856                 }
4857         }
4858
4859         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4860 }
4861
4862 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4863 {
4864         struct bpf_map_info map_info;
4865         char msg[STRERR_BUFSIZE];
4866         __u32 map_info_len = sizeof(map_info);
4867         int err;
4868
4869         memset(&map_info, 0, map_info_len);
4870         err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
4871         if (err && errno == EINVAL)
4872                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4873         if (err) {
4874                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4875                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4876                 return false;
4877         }
4878
4879         return (map_info.type == map->def.type &&
4880                 map_info.key_size == map->def.key_size &&
4881                 map_info.value_size == map->def.value_size &&
4882                 map_info.max_entries == map->def.max_entries &&
4883                 map_info.map_flags == map->def.map_flags &&
4884                 map_info.map_extra == map->map_extra);
4885 }
4886
4887 static int
4888 bpf_object__reuse_map(struct bpf_map *map)
4889 {
4890         char *cp, errmsg[STRERR_BUFSIZE];
4891         int err, pin_fd;
4892
4893         pin_fd = bpf_obj_get(map->pin_path);
4894         if (pin_fd < 0) {
4895                 err = -errno;
4896                 if (err == -ENOENT) {
4897                         pr_debug("found no pinned map to reuse at '%s'\n",
4898                                  map->pin_path);
4899                         return 0;
4900                 }
4901
4902                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4903                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4904                         map->pin_path, cp);
4905                 return err;
4906         }
4907
4908         if (!map_is_reuse_compat(map, pin_fd)) {
4909                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4910                         map->pin_path);
4911                 close(pin_fd);
4912                 return -EINVAL;
4913         }
4914
4915         err = bpf_map__reuse_fd(map, pin_fd);
4916         close(pin_fd);
4917         if (err) {
4918                 return err;
4919         }
4920         map->pinned = true;
4921         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4922
4923         return 0;
4924 }
4925
4926 static int
4927 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4928 {
4929         enum libbpf_map_type map_type = map->libbpf_type;
4930         char *cp, errmsg[STRERR_BUFSIZE];
4931         int err, zero = 0;
4932
4933         if (obj->gen_loader) {
4934                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4935                                          map->mmaped, map->def.value_size);
4936                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4937                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4938                 return 0;
4939         }
4940         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4941         if (err) {
4942                 err = -errno;
4943                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4944                 pr_warn("Error setting initial map(%s) contents: %s\n",
4945                         map->name, cp);
4946                 return err;
4947         }
4948
4949         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4950         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4951                 err = bpf_map_freeze(map->fd);
4952                 if (err) {
4953                         err = -errno;
4954                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4955                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4956                                 map->name, cp);
4957                         return err;
4958                 }
4959         }
4960         return 0;
4961 }
4962
4963 static void bpf_map__destroy(struct bpf_map *map);
4964
4965 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4966 {
4967         LIBBPF_OPTS(bpf_map_create_opts, create_attr);
4968         struct bpf_map_def *def = &map->def;
4969         const char *map_name = NULL;
4970         int err = 0;
4971
4972         if (kernel_supports(obj, FEAT_PROG_NAME))
4973                 map_name = map->name;
4974         create_attr.map_ifindex = map->map_ifindex;
4975         create_attr.map_flags = def->map_flags;
4976         create_attr.numa_node = map->numa_node;
4977         create_attr.map_extra = map->map_extra;
4978
4979         if (bpf_map__is_struct_ops(map))
4980                 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4981
4982         if (obj->btf && btf__fd(obj->btf) >= 0) {
4983                 create_attr.btf_fd = btf__fd(obj->btf);
4984                 create_attr.btf_key_type_id = map->btf_key_type_id;
4985                 create_attr.btf_value_type_id = map->btf_value_type_id;
4986         }
4987
4988         if (bpf_map_type__is_map_in_map(def->type)) {
4989                 if (map->inner_map) {
4990                         err = bpf_object__create_map(obj, map->inner_map, true);
4991                         if (err) {
4992                                 pr_warn("map '%s': failed to create inner map: %d\n",
4993                                         map->name, err);
4994                                 return err;
4995                         }
4996                         map->inner_map_fd = bpf_map__fd(map->inner_map);
4997                 }
4998                 if (map->inner_map_fd >= 0)
4999                         create_attr.inner_map_fd = map->inner_map_fd;
5000         }
5001
5002         switch (def->type) {
5003         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5004         case BPF_MAP_TYPE_CGROUP_ARRAY:
5005         case BPF_MAP_TYPE_STACK_TRACE:
5006         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5007         case BPF_MAP_TYPE_HASH_OF_MAPS:
5008         case BPF_MAP_TYPE_DEVMAP:
5009         case BPF_MAP_TYPE_DEVMAP_HASH:
5010         case BPF_MAP_TYPE_CPUMAP:
5011         case BPF_MAP_TYPE_XSKMAP:
5012         case BPF_MAP_TYPE_SOCKMAP:
5013         case BPF_MAP_TYPE_SOCKHASH:
5014         case BPF_MAP_TYPE_QUEUE:
5015         case BPF_MAP_TYPE_STACK:
5016                 create_attr.btf_fd = 0;
5017                 create_attr.btf_key_type_id = 0;
5018                 create_attr.btf_value_type_id = 0;
5019                 map->btf_key_type_id = 0;
5020                 map->btf_value_type_id = 0;
5021         default:
5022                 break;
5023         }
5024
5025         if (obj->gen_loader) {
5026                 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5027                                     def->key_size, def->value_size, def->max_entries,
5028                                     &create_attr, is_inner ? -1 : map - obj->maps);
5029                 /* Pretend to have valid FD to pass various fd >= 0 checks.
5030                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
5031                  */
5032                 map->fd = 0;
5033         } else {
5034                 map->fd = bpf_map_create(def->type, map_name,
5035                                          def->key_size, def->value_size,
5036                                          def->max_entries, &create_attr);
5037         }
5038         if (map->fd < 0 && (create_attr.btf_key_type_id ||
5039                             create_attr.btf_value_type_id)) {
5040                 char *cp, errmsg[STRERR_BUFSIZE];
5041
5042                 err = -errno;
5043                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5044                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5045                         map->name, cp, err);
5046                 create_attr.btf_fd = 0;
5047                 create_attr.btf_key_type_id = 0;
5048                 create_attr.btf_value_type_id = 0;
5049                 map->btf_key_type_id = 0;
5050                 map->btf_value_type_id = 0;
5051                 map->fd = bpf_map_create(def->type, map_name,
5052                                          def->key_size, def->value_size,
5053                                          def->max_entries, &create_attr);
5054         }
5055
5056         err = map->fd < 0 ? -errno : 0;
5057
5058         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5059                 if (obj->gen_loader)
5060                         map->inner_map->fd = -1;
5061                 bpf_map__destroy(map->inner_map);
5062                 zfree(&map->inner_map);
5063         }
5064
5065         return err;
5066 }
5067
5068 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5069 {
5070         const struct bpf_map *targ_map;
5071         unsigned int i;
5072         int fd, err = 0;
5073
5074         for (i = 0; i < map->init_slots_sz; i++) {
5075                 if (!map->init_slots[i])
5076                         continue;
5077
5078                 targ_map = map->init_slots[i];
5079                 fd = bpf_map__fd(targ_map);
5080
5081                 if (obj->gen_loader) {
5082                         bpf_gen__populate_outer_map(obj->gen_loader,
5083                                                     map - obj->maps, i,
5084                                                     targ_map - obj->maps);
5085                 } else {
5086                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5087                 }
5088                 if (err) {
5089                         err = -errno;
5090                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5091                                 map->name, i, targ_map->name, fd, err);
5092                         return err;
5093                 }
5094                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5095                          map->name, i, targ_map->name, fd);
5096         }
5097
5098         zfree(&map->init_slots);
5099         map->init_slots_sz = 0;
5100
5101         return 0;
5102 }
5103
5104 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5105 {
5106         const struct bpf_program *targ_prog;
5107         unsigned int i;
5108         int fd, err;
5109
5110         if (obj->gen_loader)
5111                 return -ENOTSUP;
5112
5113         for (i = 0; i < map->init_slots_sz; i++) {
5114                 if (!map->init_slots[i])
5115                         continue;
5116
5117                 targ_prog = map->init_slots[i];
5118                 fd = bpf_program__fd(targ_prog);
5119
5120                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5121                 if (err) {
5122                         err = -errno;
5123                         pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5124                                 map->name, i, targ_prog->name, fd, err);
5125                         return err;
5126                 }
5127                 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5128                          map->name, i, targ_prog->name, fd);
5129         }
5130
5131         zfree(&map->init_slots);
5132         map->init_slots_sz = 0;
5133
5134         return 0;
5135 }
5136
5137 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5138 {
5139         struct bpf_map *map;
5140         int i, err;
5141
5142         for (i = 0; i < obj->nr_maps; i++) {
5143                 map = &obj->maps[i];
5144
5145                 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5146                         continue;
5147
5148                 err = init_prog_array_slots(obj, map);
5149                 if (err < 0) {
5150                         zclose(map->fd);
5151                         return err;
5152                 }
5153         }
5154         return 0;
5155 }
5156
5157 static int map_set_def_max_entries(struct bpf_map *map)
5158 {
5159         if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5160                 int nr_cpus;
5161
5162                 nr_cpus = libbpf_num_possible_cpus();
5163                 if (nr_cpus < 0) {
5164                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5165                                 map->name, nr_cpus);
5166                         return nr_cpus;
5167                 }
5168                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5169                 map->def.max_entries = nr_cpus;
5170         }
5171
5172         return 0;
5173 }
5174
5175 static int
5176 bpf_object__create_maps(struct bpf_object *obj)
5177 {
5178         struct bpf_map *map;
5179         char *cp, errmsg[STRERR_BUFSIZE];
5180         unsigned int i, j;
5181         int err;
5182         bool retried;
5183
5184         for (i = 0; i < obj->nr_maps; i++) {
5185                 map = &obj->maps[i];
5186
5187                 /* To support old kernels, we skip creating global data maps
5188                  * (.rodata, .data, .kconfig, etc); later on, during program
5189                  * loading, if we detect that at least one of the to-be-loaded
5190                  * programs is referencing any global data map, we'll error
5191                  * out with program name and relocation index logged.
5192                  * This approach allows to accommodate Clang emitting
5193                  * unnecessary .rodata.str1.1 sections for string literals,
5194                  * but also it allows to have CO-RE applications that use
5195                  * global variables in some of BPF programs, but not others.
5196                  * If those global variable-using programs are not loaded at
5197                  * runtime due to bpf_program__set_autoload(prog, false),
5198                  * bpf_object loading will succeed just fine even on old
5199                  * kernels.
5200                  */
5201                 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5202                         map->autocreate = false;
5203
5204                 if (!map->autocreate) {
5205                         pr_debug("map '%s': skipped auto-creating...\n", map->name);
5206                         continue;
5207                 }
5208
5209                 err = map_set_def_max_entries(map);
5210                 if (err)
5211                         goto err_out;
5212
5213                 retried = false;
5214 retry:
5215                 if (map->pin_path) {
5216                         err = bpf_object__reuse_map(map);
5217                         if (err) {
5218                                 pr_warn("map '%s': error reusing pinned map\n",
5219                                         map->name);
5220                                 goto err_out;
5221                         }
5222                         if (retried && map->fd < 0) {
5223                                 pr_warn("map '%s': cannot find pinned map\n",
5224                                         map->name);
5225                                 err = -ENOENT;
5226                                 goto err_out;
5227                         }
5228                 }
5229
5230                 if (map->fd >= 0) {
5231                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5232                                  map->name, map->fd);
5233                 } else {
5234                         err = bpf_object__create_map(obj, map, false);
5235                         if (err)
5236                                 goto err_out;
5237
5238                         pr_debug("map '%s': created successfully, fd=%d\n",
5239                                  map->name, map->fd);
5240
5241                         if (bpf_map__is_internal(map)) {
5242                                 err = bpf_object__populate_internal_map(obj, map);
5243                                 if (err < 0) {
5244                                         zclose(map->fd);
5245                                         goto err_out;
5246                                 }
5247                         }
5248
5249                         if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5250                                 err = init_map_in_map_slots(obj, map);
5251                                 if (err < 0) {
5252                                         zclose(map->fd);
5253                                         goto err_out;
5254                                 }
5255                         }
5256                 }
5257
5258                 if (map->pin_path && !map->pinned) {
5259                         err = bpf_map__pin(map, NULL);
5260                         if (err) {
5261                                 zclose(map->fd);
5262                                 if (!retried && err == -EEXIST) {
5263                                         retried = true;
5264                                         goto retry;
5265                                 }
5266                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5267                                         map->name, map->pin_path, err);
5268                                 goto err_out;
5269                         }
5270                 }
5271         }
5272
5273         return 0;
5274
5275 err_out:
5276         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5277         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5278         pr_perm_msg(err);
5279         for (j = 0; j < i; j++)
5280                 zclose(obj->maps[j].fd);
5281         return err;
5282 }
5283
5284 static bool bpf_core_is_flavor_sep(const char *s)
5285 {
5286         /* check X___Y name pattern, where X and Y are not underscores */
5287         return s[0] != '_' &&                                 /* X */
5288                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5289                s[4] != '_';                                   /* Y */
5290 }
5291
5292 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5293  * before last triple underscore. Struct name part after last triple
5294  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5295  */
5296 size_t bpf_core_essential_name_len(const char *name)
5297 {
5298         size_t n = strlen(name);
5299         int i;
5300
5301         for (i = n - 5; i >= 0; i--) {
5302                 if (bpf_core_is_flavor_sep(name + i))
5303                         return i + 1;
5304         }
5305         return n;
5306 }
5307
5308 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5309 {
5310         if (!cands)
5311                 return;
5312
5313         free(cands->cands);
5314         free(cands);
5315 }
5316
5317 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5318                        size_t local_essent_len,
5319                        const struct btf *targ_btf,
5320                        const char *targ_btf_name,
5321                        int targ_start_id,
5322                        struct bpf_core_cand_list *cands)
5323 {
5324         struct bpf_core_cand *new_cands, *cand;
5325         const struct btf_type *t, *local_t;
5326         const char *targ_name, *local_name;
5327         size_t targ_essent_len;
5328         int n, i;
5329
5330         local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5331         local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5332
5333         n = btf__type_cnt(targ_btf);
5334         for (i = targ_start_id; i < n; i++) {
5335                 t = btf__type_by_id(targ_btf, i);
5336                 if (!btf_kind_core_compat(t, local_t))
5337                         continue;
5338
5339                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5340                 if (str_is_empty(targ_name))
5341                         continue;
5342
5343                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5344                 if (targ_essent_len != local_essent_len)
5345                         continue;
5346
5347                 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5348                         continue;
5349
5350                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5351                          local_cand->id, btf_kind_str(local_t),
5352                          local_name, i, btf_kind_str(t), targ_name,
5353                          targ_btf_name);
5354                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5355                                               sizeof(*cands->cands));
5356                 if (!new_cands)
5357                         return -ENOMEM;
5358
5359                 cand = &new_cands[cands->len];
5360                 cand->btf = targ_btf;
5361                 cand->id = i;
5362
5363                 cands->cands = new_cands;
5364                 cands->len++;
5365         }
5366         return 0;
5367 }
5368
5369 static int load_module_btfs(struct bpf_object *obj)
5370 {
5371         struct bpf_btf_info info;
5372         struct module_btf *mod_btf;
5373         struct btf *btf;
5374         char name[64];
5375         __u32 id = 0, len;
5376         int err, fd;
5377
5378         if (obj->btf_modules_loaded)
5379                 return 0;
5380
5381         if (obj->gen_loader)
5382                 return 0;
5383
5384         /* don't do this again, even if we find no module BTFs */
5385         obj->btf_modules_loaded = true;
5386
5387         /* kernel too old to support module BTFs */
5388         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5389                 return 0;
5390
5391         while (true) {
5392                 err = bpf_btf_get_next_id(id, &id);
5393                 if (err && errno == ENOENT)
5394                         return 0;
5395                 if (err) {
5396                         err = -errno;
5397                         pr_warn("failed to iterate BTF objects: %d\n", err);
5398                         return err;
5399                 }
5400
5401                 fd = bpf_btf_get_fd_by_id(id);
5402                 if (fd < 0) {
5403                         if (errno == ENOENT)
5404                                 continue; /* expected race: BTF was unloaded */
5405                         err = -errno;
5406                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5407                         return err;
5408                 }
5409
5410                 len = sizeof(info);
5411                 memset(&info, 0, sizeof(info));
5412                 info.name = ptr_to_u64(name);
5413                 info.name_len = sizeof(name);
5414
5415                 err = bpf_obj_get_info_by_fd(fd, &info, &len);
5416                 if (err) {
5417                         err = -errno;
5418                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5419                         goto err_out;
5420                 }
5421
5422                 /* ignore non-module BTFs */
5423                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5424                         close(fd);
5425                         continue;
5426                 }
5427
5428                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5429                 err = libbpf_get_error(btf);
5430                 if (err) {
5431                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5432                                 name, id, err);
5433                         goto err_out;
5434                 }
5435
5436                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5437                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5438                 if (err)
5439                         goto err_out;
5440
5441                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5442
5443                 mod_btf->btf = btf;
5444                 mod_btf->id = id;
5445                 mod_btf->fd = fd;
5446                 mod_btf->name = strdup(name);
5447                 if (!mod_btf->name) {
5448                         err = -ENOMEM;
5449                         goto err_out;
5450                 }
5451                 continue;
5452
5453 err_out:
5454                 close(fd);
5455                 return err;
5456         }
5457
5458         return 0;
5459 }
5460
5461 static struct bpf_core_cand_list *
5462 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5463 {
5464         struct bpf_core_cand local_cand = {};
5465         struct bpf_core_cand_list *cands;
5466         const struct btf *main_btf;
5467         const struct btf_type *local_t;
5468         const char *local_name;
5469         size_t local_essent_len;
5470         int err, i;
5471
5472         local_cand.btf = local_btf;
5473         local_cand.id = local_type_id;
5474         local_t = btf__type_by_id(local_btf, local_type_id);
5475         if (!local_t)
5476                 return ERR_PTR(-EINVAL);
5477
5478         local_name = btf__name_by_offset(local_btf, local_t->name_off);
5479         if (str_is_empty(local_name))
5480                 return ERR_PTR(-EINVAL);
5481         local_essent_len = bpf_core_essential_name_len(local_name);
5482
5483         cands = calloc(1, sizeof(*cands));
5484         if (!cands)
5485                 return ERR_PTR(-ENOMEM);
5486
5487         /* Attempt to find target candidates in vmlinux BTF first */
5488         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5489         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5490         if (err)
5491                 goto err_out;
5492
5493         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5494         if (cands->len)
5495                 return cands;
5496
5497         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5498         if (obj->btf_vmlinux_override)
5499                 return cands;
5500
5501         /* now look through module BTFs, trying to still find candidates */
5502         err = load_module_btfs(obj);
5503         if (err)
5504                 goto err_out;
5505
5506         for (i = 0; i < obj->btf_module_cnt; i++) {
5507                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5508                                          obj->btf_modules[i].btf,
5509                                          obj->btf_modules[i].name,
5510                                          btf__type_cnt(obj->btf_vmlinux),
5511                                          cands);
5512                 if (err)
5513                         goto err_out;
5514         }
5515
5516         return cands;
5517 err_out:
5518         bpf_core_free_cands(cands);
5519         return ERR_PTR(err);
5520 }
5521
5522 /* Check local and target types for compatibility. This check is used for
5523  * type-based CO-RE relocations and follow slightly different rules than
5524  * field-based relocations. This function assumes that root types were already
5525  * checked for name match. Beyond that initial root-level name check, names
5526  * are completely ignored. Compatibility rules are as follows:
5527  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5528  *     kind should match for local and target types (i.e., STRUCT is not
5529  *     compatible with UNION);
5530  *   - for ENUMs, the size is ignored;
5531  *   - for INT, size and signedness are ignored;
5532  *   - for ARRAY, dimensionality is ignored, element types are checked for
5533  *     compatibility recursively;
5534  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5535  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5536  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5537  *     number of input args and compatible return and argument types.
5538  * These rules are not set in stone and probably will be adjusted as we get
5539  * more experience with using BPF CO-RE relocations.
5540  */
5541 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5542                               const struct btf *targ_btf, __u32 targ_id)
5543 {
5544         return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5545 }
5546
5547 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5548                          const struct btf *targ_btf, __u32 targ_id)
5549 {
5550         return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5551 }
5552
5553 static size_t bpf_core_hash_fn(const void *key, void *ctx)
5554 {
5555         return (size_t)key;
5556 }
5557
5558 static bool bpf_core_equal_fn(const void *k1, const void *k2, void *ctx)
5559 {
5560         return k1 == k2;
5561 }
5562
5563 static void *u32_as_hash_key(__u32 x)
5564 {
5565         return (void *)(uintptr_t)x;
5566 }
5567
5568 static int record_relo_core(struct bpf_program *prog,
5569                             const struct bpf_core_relo *core_relo, int insn_idx)
5570 {
5571         struct reloc_desc *relos, *relo;
5572
5573         relos = libbpf_reallocarray(prog->reloc_desc,
5574                                     prog->nr_reloc + 1, sizeof(*relos));
5575         if (!relos)
5576                 return -ENOMEM;
5577         relo = &relos[prog->nr_reloc];
5578         relo->type = RELO_CORE;
5579         relo->insn_idx = insn_idx;
5580         relo->core_relo = core_relo;
5581         prog->reloc_desc = relos;
5582         prog->nr_reloc++;
5583         return 0;
5584 }
5585
5586 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5587 {
5588         struct reloc_desc *relo;
5589         int i;
5590
5591         for (i = 0; i < prog->nr_reloc; i++) {
5592                 relo = &prog->reloc_desc[i];
5593                 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5594                         continue;
5595
5596                 return relo->core_relo;
5597         }
5598
5599         return NULL;
5600 }
5601
5602 static int bpf_core_resolve_relo(struct bpf_program *prog,
5603                                  const struct bpf_core_relo *relo,
5604                                  int relo_idx,
5605                                  const struct btf *local_btf,
5606                                  struct hashmap *cand_cache,
5607                                  struct bpf_core_relo_res *targ_res)
5608 {
5609         struct bpf_core_spec specs_scratch[3] = {};
5610         const void *type_key = u32_as_hash_key(relo->type_id);
5611         struct bpf_core_cand_list *cands = NULL;
5612         const char *prog_name = prog->name;
5613         const struct btf_type *local_type;
5614         const char *local_name;
5615         __u32 local_id = relo->type_id;
5616         int err;
5617
5618         local_type = btf__type_by_id(local_btf, local_id);
5619         if (!local_type)
5620                 return -EINVAL;
5621
5622         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5623         if (!local_name)
5624                 return -EINVAL;
5625
5626         if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5627             !hashmap__find(cand_cache, type_key, (void **)&cands)) {
5628                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5629                 if (IS_ERR(cands)) {
5630                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5631                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5632                                 local_name, PTR_ERR(cands));
5633                         return PTR_ERR(cands);
5634                 }
5635                 err = hashmap__set(cand_cache, type_key, cands, NULL, NULL);
5636                 if (err) {
5637                         bpf_core_free_cands(cands);
5638                         return err;
5639                 }
5640         }
5641
5642         return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5643                                        targ_res);
5644 }
5645
5646 static int
5647 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5648 {
5649         const struct btf_ext_info_sec *sec;
5650         struct bpf_core_relo_res targ_res;
5651         const struct bpf_core_relo *rec;
5652         const struct btf_ext_info *seg;
5653         struct hashmap_entry *entry;
5654         struct hashmap *cand_cache = NULL;
5655         struct bpf_program *prog;
5656         struct bpf_insn *insn;
5657         const char *sec_name;
5658         int i, err = 0, insn_idx, sec_idx, sec_num;
5659
5660         if (obj->btf_ext->core_relo_info.len == 0)
5661                 return 0;
5662
5663         if (targ_btf_path) {
5664                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5665                 err = libbpf_get_error(obj->btf_vmlinux_override);
5666                 if (err) {
5667                         pr_warn("failed to parse target BTF: %d\n", err);
5668                         return err;
5669                 }
5670         }
5671
5672         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5673         if (IS_ERR(cand_cache)) {
5674                 err = PTR_ERR(cand_cache);
5675                 goto out;
5676         }
5677
5678         seg = &obj->btf_ext->core_relo_info;
5679         sec_num = 0;
5680         for_each_btf_ext_sec(seg, sec) {
5681                 sec_idx = seg->sec_idxs[sec_num];
5682                 sec_num++;
5683
5684                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5685                 if (str_is_empty(sec_name)) {
5686                         err = -EINVAL;
5687                         goto out;
5688                 }
5689
5690                 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5691
5692                 for_each_btf_ext_rec(seg, sec, i, rec) {
5693                         if (rec->insn_off % BPF_INSN_SZ)
5694                                 return -EINVAL;
5695                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5696                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5697                         if (!prog) {
5698                                 /* When __weak subprog is "overridden" by another instance
5699                                  * of the subprog from a different object file, linker still
5700                                  * appends all the .BTF.ext info that used to belong to that
5701                                  * eliminated subprogram.
5702                                  * This is similar to what x86-64 linker does for relocations.
5703                                  * So just ignore such relocations just like we ignore
5704                                  * subprog instructions when discovering subprograms.
5705                                  */
5706                                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5707                                          sec_name, i, insn_idx);
5708                                 continue;
5709                         }
5710                         /* no need to apply CO-RE relocation if the program is
5711                          * not going to be loaded
5712                          */
5713                         if (!prog->autoload)
5714                                 continue;
5715
5716                         /* adjust insn_idx from section frame of reference to the local
5717                          * program's frame of reference; (sub-)program code is not yet
5718                          * relocated, so it's enough to just subtract in-section offset
5719                          */
5720                         insn_idx = insn_idx - prog->sec_insn_off;
5721                         if (insn_idx >= prog->insns_cnt)
5722                                 return -EINVAL;
5723                         insn = &prog->insns[insn_idx];
5724
5725                         err = record_relo_core(prog, rec, insn_idx);
5726                         if (err) {
5727                                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5728                                         prog->name, i, err);
5729                                 goto out;
5730                         }
5731
5732                         if (prog->obj->gen_loader)
5733                                 continue;
5734
5735                         err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5736                         if (err) {
5737                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5738                                         prog->name, i, err);
5739                                 goto out;
5740                         }
5741
5742                         err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5743                         if (err) {
5744                                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5745                                         prog->name, i, insn_idx, err);
5746                                 goto out;
5747                         }
5748                 }
5749         }
5750
5751 out:
5752         /* obj->btf_vmlinux and module BTFs are freed after object load */
5753         btf__free(obj->btf_vmlinux_override);
5754         obj->btf_vmlinux_override = NULL;
5755
5756         if (!IS_ERR_OR_NULL(cand_cache)) {
5757                 hashmap__for_each_entry(cand_cache, entry, i) {
5758                         bpf_core_free_cands(entry->value);
5759                 }
5760                 hashmap__free(cand_cache);
5761         }
5762         return err;
5763 }
5764
5765 /* base map load ldimm64 special constant, used also for log fixup logic */
5766 #define MAP_LDIMM64_POISON_BASE 2001000000
5767 #define MAP_LDIMM64_POISON_PFX "200100"
5768
5769 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5770                                int insn_idx, struct bpf_insn *insn,
5771                                int map_idx, const struct bpf_map *map)
5772 {
5773         int i;
5774
5775         pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5776                  prog->name, relo_idx, insn_idx, map_idx, map->name);
5777
5778         /* we turn single ldimm64 into two identical invalid calls */
5779         for (i = 0; i < 2; i++) {
5780                 insn->code = BPF_JMP | BPF_CALL;
5781                 insn->dst_reg = 0;
5782                 insn->src_reg = 0;
5783                 insn->off = 0;
5784                 /* if this instruction is reachable (not a dead code),
5785                  * verifier will complain with something like:
5786                  * invalid func unknown#2001000123
5787                  * where lower 123 is map index into obj->maps[] array
5788                  */
5789                 insn->imm = MAP_LDIMM64_POISON_BASE + map_idx;
5790
5791                 insn++;
5792         }
5793 }
5794
5795 /* Relocate data references within program code:
5796  *  - map references;
5797  *  - global variable references;
5798  *  - extern references.
5799  */
5800 static int
5801 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5802 {
5803         int i;
5804
5805         for (i = 0; i < prog->nr_reloc; i++) {
5806                 struct reloc_desc *relo = &prog->reloc_desc[i];
5807                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5808                 const struct bpf_map *map;
5809                 struct extern_desc *ext;
5810
5811                 switch (relo->type) {
5812                 case RELO_LD64:
5813                         map = &obj->maps[relo->map_idx];
5814                         if (obj->gen_loader) {
5815                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5816                                 insn[0].imm = relo->map_idx;
5817                         } else if (map->autocreate) {
5818                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5819                                 insn[0].imm = map->fd;
5820                         } else {
5821                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5822                                                    relo->map_idx, map);
5823                         }
5824                         break;
5825                 case RELO_DATA:
5826                         map = &obj->maps[relo->map_idx];
5827                         insn[1].imm = insn[0].imm + relo->sym_off;
5828                         if (obj->gen_loader) {
5829                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5830                                 insn[0].imm = relo->map_idx;
5831                         } else if (map->autocreate) {
5832                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5833                                 insn[0].imm = map->fd;
5834                         } else {
5835                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5836                                                    relo->map_idx, map);
5837                         }
5838                         break;
5839                 case RELO_EXTERN_VAR:
5840                         ext = &obj->externs[relo->sym_off];
5841                         if (ext->type == EXT_KCFG) {
5842                                 if (obj->gen_loader) {
5843                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5844                                         insn[0].imm = obj->kconfig_map_idx;
5845                                 } else {
5846                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5847                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5848                                 }
5849                                 insn[1].imm = ext->kcfg.data_off;
5850                         } else /* EXT_KSYM */ {
5851                                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5852                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5853                                         insn[0].imm = ext->ksym.kernel_btf_id;
5854                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5855                                 } else { /* typeless ksyms or unresolved typed ksyms */
5856                                         insn[0].imm = (__u32)ext->ksym.addr;
5857                                         insn[1].imm = ext->ksym.addr >> 32;
5858                                 }
5859                         }
5860                         break;
5861                 case RELO_EXTERN_FUNC:
5862                         ext = &obj->externs[relo->sym_off];
5863                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5864                         if (ext->is_set) {
5865                                 insn[0].imm = ext->ksym.kernel_btf_id;
5866                                 insn[0].off = ext->ksym.btf_fd_idx;
5867                         } else { /* unresolved weak kfunc */
5868                                 insn[0].imm = 0;
5869                                 insn[0].off = 0;
5870                         }
5871                         break;
5872                 case RELO_SUBPROG_ADDR:
5873                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5874                                 pr_warn("prog '%s': relo #%d: bad insn\n",
5875                                         prog->name, i);
5876                                 return -EINVAL;
5877                         }
5878                         /* handled already */
5879                         break;
5880                 case RELO_CALL:
5881                         /* handled already */
5882                         break;
5883                 case RELO_CORE:
5884                         /* will be handled by bpf_program_record_relos() */
5885                         break;
5886                 default:
5887                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5888                                 prog->name, i, relo->type);
5889                         return -EINVAL;
5890                 }
5891         }
5892
5893         return 0;
5894 }
5895
5896 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5897                                     const struct bpf_program *prog,
5898                                     const struct btf_ext_info *ext_info,
5899                                     void **prog_info, __u32 *prog_rec_cnt,
5900                                     __u32 *prog_rec_sz)
5901 {
5902         void *copy_start = NULL, *copy_end = NULL;
5903         void *rec, *rec_end, *new_prog_info;
5904         const struct btf_ext_info_sec *sec;
5905         size_t old_sz, new_sz;
5906         int i, sec_num, sec_idx, off_adj;
5907
5908         sec_num = 0;
5909         for_each_btf_ext_sec(ext_info, sec) {
5910                 sec_idx = ext_info->sec_idxs[sec_num];
5911                 sec_num++;
5912                 if (prog->sec_idx != sec_idx)
5913                         continue;
5914
5915                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5916                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5917
5918                         if (insn_off < prog->sec_insn_off)
5919                                 continue;
5920                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5921                                 break;
5922
5923                         if (!copy_start)
5924                                 copy_start = rec;
5925                         copy_end = rec + ext_info->rec_size;
5926                 }
5927
5928                 if (!copy_start)
5929                         return -ENOENT;
5930
5931                 /* append func/line info of a given (sub-)program to the main
5932                  * program func/line info
5933                  */
5934                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5935                 new_sz = old_sz + (copy_end - copy_start);
5936                 new_prog_info = realloc(*prog_info, new_sz);
5937                 if (!new_prog_info)
5938                         return -ENOMEM;
5939                 *prog_info = new_prog_info;
5940                 *prog_rec_cnt = new_sz / ext_info->rec_size;
5941                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5942
5943                 /* Kernel instruction offsets are in units of 8-byte
5944                  * instructions, while .BTF.ext instruction offsets generated
5945                  * by Clang are in units of bytes. So convert Clang offsets
5946                  * into kernel offsets and adjust offset according to program
5947                  * relocated position.
5948                  */
5949                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
5950                 rec = new_prog_info + old_sz;
5951                 rec_end = new_prog_info + new_sz;
5952                 for (; rec < rec_end; rec += ext_info->rec_size) {
5953                         __u32 *insn_off = rec;
5954
5955                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5956                 }
5957                 *prog_rec_sz = ext_info->rec_size;
5958                 return 0;
5959         }
5960
5961         return -ENOENT;
5962 }
5963
5964 static int
5965 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5966                               struct bpf_program *main_prog,
5967                               const struct bpf_program *prog)
5968 {
5969         int err;
5970
5971         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5972          * supprot func/line info
5973          */
5974         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
5975                 return 0;
5976
5977         /* only attempt func info relocation if main program's func_info
5978          * relocation was successful
5979          */
5980         if (main_prog != prog && !main_prog->func_info)
5981                 goto line_info;
5982
5983         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5984                                        &main_prog->func_info,
5985                                        &main_prog->func_info_cnt,
5986                                        &main_prog->func_info_rec_size);
5987         if (err) {
5988                 if (err != -ENOENT) {
5989                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5990                                 prog->name, err);
5991                         return err;
5992                 }
5993                 if (main_prog->func_info) {
5994                         /*
5995                          * Some info has already been found but has problem
5996                          * in the last btf_ext reloc. Must have to error out.
5997                          */
5998                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5999                         return err;
6000                 }
6001                 /* Have problem loading the very first info. Ignore the rest. */
6002                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6003                         prog->name);
6004         }
6005
6006 line_info:
6007         /* don't relocate line info if main program's relocation failed */
6008         if (main_prog != prog && !main_prog->line_info)
6009                 return 0;
6010
6011         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6012                                        &main_prog->line_info,
6013                                        &main_prog->line_info_cnt,
6014                                        &main_prog->line_info_rec_size);
6015         if (err) {
6016                 if (err != -ENOENT) {
6017                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6018                                 prog->name, err);
6019                         return err;
6020                 }
6021                 if (main_prog->line_info) {
6022                         /*
6023                          * Some info has already been found but has problem
6024                          * in the last btf_ext reloc. Must have to error out.
6025                          */
6026                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6027                         return err;
6028                 }
6029                 /* Have problem loading the very first info. Ignore the rest. */
6030                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6031                         prog->name);
6032         }
6033         return 0;
6034 }
6035
6036 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6037 {
6038         size_t insn_idx = *(const size_t *)key;
6039         const struct reloc_desc *relo = elem;
6040
6041         if (insn_idx == relo->insn_idx)
6042                 return 0;
6043         return insn_idx < relo->insn_idx ? -1 : 1;
6044 }
6045
6046 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6047 {
6048         if (!prog->nr_reloc)
6049                 return NULL;
6050         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6051                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6052 }
6053
6054 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6055 {
6056         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6057         struct reloc_desc *relos;
6058         int i;
6059
6060         if (main_prog == subprog)
6061                 return 0;
6062         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6063         if (!relos)
6064                 return -ENOMEM;
6065         if (subprog->nr_reloc)
6066                 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6067                        sizeof(*relos) * subprog->nr_reloc);
6068
6069         for (i = main_prog->nr_reloc; i < new_cnt; i++)
6070                 relos[i].insn_idx += subprog->sub_insn_off;
6071         /* After insn_idx adjustment the 'relos' array is still sorted
6072          * by insn_idx and doesn't break bsearch.
6073          */
6074         main_prog->reloc_desc = relos;
6075         main_prog->nr_reloc = new_cnt;
6076         return 0;
6077 }
6078
6079 static int
6080 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6081                        struct bpf_program *prog)
6082 {
6083         size_t sub_insn_idx, insn_idx, new_cnt;
6084         struct bpf_program *subprog;
6085         struct bpf_insn *insns, *insn;
6086         struct reloc_desc *relo;
6087         int err;
6088
6089         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6090         if (err)
6091                 return err;
6092
6093         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6094                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6095                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6096                         continue;
6097
6098                 relo = find_prog_insn_relo(prog, insn_idx);
6099                 if (relo && relo->type == RELO_EXTERN_FUNC)
6100                         /* kfunc relocations will be handled later
6101                          * in bpf_object__relocate_data()
6102                          */
6103                         continue;
6104                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6105                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6106                                 prog->name, insn_idx, relo->type);
6107                         return -LIBBPF_ERRNO__RELOC;
6108                 }
6109                 if (relo) {
6110                         /* sub-program instruction index is a combination of
6111                          * an offset of a symbol pointed to by relocation and
6112                          * call instruction's imm field; for global functions,
6113                          * call always has imm = -1, but for static functions
6114                          * relocation is against STT_SECTION and insn->imm
6115                          * points to a start of a static function
6116                          *
6117                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6118                          * the byte offset in the corresponding section.
6119                          */
6120                         if (relo->type == RELO_CALL)
6121                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6122                         else
6123                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6124                 } else if (insn_is_pseudo_func(insn)) {
6125                         /*
6126                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6127                          * functions are in the same section, so it shouldn't reach here.
6128                          */
6129                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6130                                 prog->name, insn_idx);
6131                         return -LIBBPF_ERRNO__RELOC;
6132                 } else {
6133                         /* if subprogram call is to a static function within
6134                          * the same ELF section, there won't be any relocation
6135                          * emitted, but it also means there is no additional
6136                          * offset necessary, insns->imm is relative to
6137                          * instruction's original position within the section
6138                          */
6139                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6140                 }
6141
6142                 /* we enforce that sub-programs should be in .text section */
6143                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6144                 if (!subprog) {
6145                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6146                                 prog->name);
6147                         return -LIBBPF_ERRNO__RELOC;
6148                 }
6149
6150                 /* if it's the first call instruction calling into this
6151                  * subprogram (meaning this subprog hasn't been processed
6152                  * yet) within the context of current main program:
6153                  *   - append it at the end of main program's instructions blog;
6154                  *   - process is recursively, while current program is put on hold;
6155                  *   - if that subprogram calls some other not yet processes
6156                  *   subprogram, same thing will happen recursively until
6157                  *   there are no more unprocesses subprograms left to append
6158                  *   and relocate.
6159                  */
6160                 if (subprog->sub_insn_off == 0) {
6161                         subprog->sub_insn_off = main_prog->insns_cnt;
6162
6163                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6164                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6165                         if (!insns) {
6166                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6167                                 return -ENOMEM;
6168                         }
6169                         main_prog->insns = insns;
6170                         main_prog->insns_cnt = new_cnt;
6171
6172                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6173                                subprog->insns_cnt * sizeof(*insns));
6174
6175                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6176                                  main_prog->name, subprog->insns_cnt, subprog->name);
6177
6178                         /* The subprog insns are now appended. Append its relos too. */
6179                         err = append_subprog_relos(main_prog, subprog);
6180                         if (err)
6181                                 return err;
6182                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6183                         if (err)
6184                                 return err;
6185                 }
6186
6187                 /* main_prog->insns memory could have been re-allocated, so
6188                  * calculate pointer again
6189                  */
6190                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6191                 /* calculate correct instruction position within current main
6192                  * prog; each main prog can have a different set of
6193                  * subprograms appended (potentially in different order as
6194                  * well), so position of any subprog can be different for
6195                  * different main programs */
6196                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6197
6198                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6199                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6200         }
6201
6202         return 0;
6203 }
6204
6205 /*
6206  * Relocate sub-program calls.
6207  *
6208  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6209  * main prog) is processed separately. For each subprog (non-entry functions,
6210  * that can be called from either entry progs or other subprogs) gets their
6211  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6212  * hasn't been yet appended and relocated within current main prog. Once its
6213  * relocated, sub_insn_off will point at the position within current main prog
6214  * where given subprog was appended. This will further be used to relocate all
6215  * the call instructions jumping into this subprog.
6216  *
6217  * We start with main program and process all call instructions. If the call
6218  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6219  * is zero), subprog instructions are appended at the end of main program's
6220  * instruction array. Then main program is "put on hold" while we recursively
6221  * process newly appended subprogram. If that subprogram calls into another
6222  * subprogram that hasn't been appended, new subprogram is appended again to
6223  * the *main* prog's instructions (subprog's instructions are always left
6224  * untouched, as they need to be in unmodified state for subsequent main progs
6225  * and subprog instructions are always sent only as part of a main prog) and
6226  * the process continues recursively. Once all the subprogs called from a main
6227  * prog or any of its subprogs are appended (and relocated), all their
6228  * positions within finalized instructions array are known, so it's easy to
6229  * rewrite call instructions with correct relative offsets, corresponding to
6230  * desired target subprog.
6231  *
6232  * Its important to realize that some subprogs might not be called from some
6233  * main prog and any of its called/used subprogs. Those will keep their
6234  * subprog->sub_insn_off as zero at all times and won't be appended to current
6235  * main prog and won't be relocated within the context of current main prog.
6236  * They might still be used from other main progs later.
6237  *
6238  * Visually this process can be shown as below. Suppose we have two main
6239  * programs mainA and mainB and BPF object contains three subprogs: subA,
6240  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6241  * subC both call subB:
6242  *
6243  *        +--------+ +-------+
6244  *        |        v v       |
6245  *     +--+---+ +--+-+-+ +---+--+
6246  *     | subA | | subB | | subC |
6247  *     +--+---+ +------+ +---+--+
6248  *        ^                  ^
6249  *        |                  |
6250  *    +---+-------+   +------+----+
6251  *    |   mainA   |   |   mainB   |
6252  *    +-----------+   +-----------+
6253  *
6254  * We'll start relocating mainA, will find subA, append it and start
6255  * processing sub A recursively:
6256  *
6257  *    +-----------+------+
6258  *    |   mainA   | subA |
6259  *    +-----------+------+
6260  *
6261  * At this point we notice that subB is used from subA, so we append it and
6262  * relocate (there are no further subcalls from subB):
6263  *
6264  *    +-----------+------+------+
6265  *    |   mainA   | subA | subB |
6266  *    +-----------+------+------+
6267  *
6268  * At this point, we relocate subA calls, then go one level up and finish with
6269  * relocatin mainA calls. mainA is done.
6270  *
6271  * For mainB process is similar but results in different order. We start with
6272  * mainB and skip subA and subB, as mainB never calls them (at least
6273  * directly), but we see subC is needed, so we append and start processing it:
6274  *
6275  *    +-----------+------+
6276  *    |   mainB   | subC |
6277  *    +-----------+------+
6278  * Now we see subC needs subB, so we go back to it, append and relocate it:
6279  *
6280  *    +-----------+------+------+
6281  *    |   mainB   | subC | subB |
6282  *    +-----------+------+------+
6283  *
6284  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6285  */
6286 static int
6287 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6288 {
6289         struct bpf_program *subprog;
6290         int i, err;
6291
6292         /* mark all subprogs as not relocated (yet) within the context of
6293          * current main program
6294          */
6295         for (i = 0; i < obj->nr_programs; i++) {
6296                 subprog = &obj->programs[i];
6297                 if (!prog_is_subprog(obj, subprog))
6298                         continue;
6299
6300                 subprog->sub_insn_off = 0;
6301         }
6302
6303         err = bpf_object__reloc_code(obj, prog, prog);
6304         if (err)
6305                 return err;
6306
6307         return 0;
6308 }
6309
6310 static void
6311 bpf_object__free_relocs(struct bpf_object *obj)
6312 {
6313         struct bpf_program *prog;
6314         int i;
6315
6316         /* free up relocation descriptors */
6317         for (i = 0; i < obj->nr_programs; i++) {
6318                 prog = &obj->programs[i];
6319                 zfree(&prog->reloc_desc);
6320                 prog->nr_reloc = 0;
6321         }
6322 }
6323
6324 static int cmp_relocs(const void *_a, const void *_b)
6325 {
6326         const struct reloc_desc *a = _a;
6327         const struct reloc_desc *b = _b;
6328
6329         if (a->insn_idx != b->insn_idx)
6330                 return a->insn_idx < b->insn_idx ? -1 : 1;
6331
6332         /* no two relocations should have the same insn_idx, but ... */
6333         if (a->type != b->type)
6334                 return a->type < b->type ? -1 : 1;
6335
6336         return 0;
6337 }
6338
6339 static void bpf_object__sort_relos(struct bpf_object *obj)
6340 {
6341         int i;
6342
6343         for (i = 0; i < obj->nr_programs; i++) {
6344                 struct bpf_program *p = &obj->programs[i];
6345
6346                 if (!p->nr_reloc)
6347                         continue;
6348
6349                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6350         }
6351 }
6352
6353 static int
6354 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6355 {
6356         struct bpf_program *prog;
6357         size_t i, j;
6358         int err;
6359
6360         if (obj->btf_ext) {
6361                 err = bpf_object__relocate_core(obj, targ_btf_path);
6362                 if (err) {
6363                         pr_warn("failed to perform CO-RE relocations: %d\n",
6364                                 err);
6365                         return err;
6366                 }
6367                 bpf_object__sort_relos(obj);
6368         }
6369
6370         /* Before relocating calls pre-process relocations and mark
6371          * few ld_imm64 instructions that points to subprogs.
6372          * Otherwise bpf_object__reloc_code() later would have to consider
6373          * all ld_imm64 insns as relocation candidates. That would
6374          * reduce relocation speed, since amount of find_prog_insn_relo()
6375          * would increase and most of them will fail to find a relo.
6376          */
6377         for (i = 0; i < obj->nr_programs; i++) {
6378                 prog = &obj->programs[i];
6379                 for (j = 0; j < prog->nr_reloc; j++) {
6380                         struct reloc_desc *relo = &prog->reloc_desc[j];
6381                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6382
6383                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6384                         if (relo->type == RELO_SUBPROG_ADDR)
6385                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6386                 }
6387         }
6388
6389         /* relocate subprogram calls and append used subprograms to main
6390          * programs; each copy of subprogram code needs to be relocated
6391          * differently for each main program, because its code location might
6392          * have changed.
6393          * Append subprog relos to main programs to allow data relos to be
6394          * processed after text is completely relocated.
6395          */
6396         for (i = 0; i < obj->nr_programs; i++) {
6397                 prog = &obj->programs[i];
6398                 /* sub-program's sub-calls are relocated within the context of
6399                  * its main program only
6400                  */
6401                 if (prog_is_subprog(obj, prog))
6402                         continue;
6403                 if (!prog->autoload)
6404                         continue;
6405
6406                 err = bpf_object__relocate_calls(obj, prog);
6407                 if (err) {
6408                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6409                                 prog->name, err);
6410                         return err;
6411                 }
6412         }
6413         /* Process data relos for main programs */
6414         for (i = 0; i < obj->nr_programs; i++) {
6415                 prog = &obj->programs[i];
6416                 if (prog_is_subprog(obj, prog))
6417                         continue;
6418                 if (!prog->autoload)
6419                         continue;
6420                 err = bpf_object__relocate_data(obj, prog);
6421                 if (err) {
6422                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6423                                 prog->name, err);
6424                         return err;
6425                 }
6426         }
6427
6428         return 0;
6429 }
6430
6431 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6432                                             Elf64_Shdr *shdr, Elf_Data *data);
6433
6434 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6435                                          Elf64_Shdr *shdr, Elf_Data *data)
6436 {
6437         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6438         int i, j, nrels, new_sz;
6439         const struct btf_var_secinfo *vi = NULL;
6440         const struct btf_type *sec, *var, *def;
6441         struct bpf_map *map = NULL, *targ_map = NULL;
6442         struct bpf_program *targ_prog = NULL;
6443         bool is_prog_array, is_map_in_map;
6444         const struct btf_member *member;
6445         const char *name, *mname, *type;
6446         unsigned int moff;
6447         Elf64_Sym *sym;
6448         Elf64_Rel *rel;
6449         void *tmp;
6450
6451         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6452                 return -EINVAL;
6453         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6454         if (!sec)
6455                 return -EINVAL;
6456
6457         nrels = shdr->sh_size / shdr->sh_entsize;
6458         for (i = 0; i < nrels; i++) {
6459                 rel = elf_rel_by_idx(data, i);
6460                 if (!rel) {
6461                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6462                         return -LIBBPF_ERRNO__FORMAT;
6463                 }
6464
6465                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6466                 if (!sym) {
6467                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6468                                 i, (size_t)ELF64_R_SYM(rel->r_info));
6469                         return -LIBBPF_ERRNO__FORMAT;
6470                 }
6471                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6472
6473                 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6474                          i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6475                          (size_t)rel->r_offset, sym->st_name, name);
6476
6477                 for (j = 0; j < obj->nr_maps; j++) {
6478                         map = &obj->maps[j];
6479                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6480                                 continue;
6481
6482                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6483                         if (vi->offset <= rel->r_offset &&
6484                             rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6485                                 break;
6486                 }
6487                 if (j == obj->nr_maps) {
6488                         pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6489                                 i, name, (size_t)rel->r_offset);
6490                         return -EINVAL;
6491                 }
6492
6493                 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6494                 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6495                 type = is_map_in_map ? "map" : "prog";
6496                 if (is_map_in_map) {
6497                         if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6498                                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6499                                         i, name);
6500                                 return -LIBBPF_ERRNO__RELOC;
6501                         }
6502                         if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6503                             map->def.key_size != sizeof(int)) {
6504                                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6505                                         i, map->name, sizeof(int));
6506                                 return -EINVAL;
6507                         }
6508                         targ_map = bpf_object__find_map_by_name(obj, name);
6509                         if (!targ_map) {
6510                                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6511                                         i, name);
6512                                 return -ESRCH;
6513                         }
6514                 } else if (is_prog_array) {
6515                         targ_prog = bpf_object__find_program_by_name(obj, name);
6516                         if (!targ_prog) {
6517                                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6518                                         i, name);
6519                                 return -ESRCH;
6520                         }
6521                         if (targ_prog->sec_idx != sym->st_shndx ||
6522                             targ_prog->sec_insn_off * 8 != sym->st_value ||
6523                             prog_is_subprog(obj, targ_prog)) {
6524                                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6525                                         i, name);
6526                                 return -LIBBPF_ERRNO__RELOC;
6527                         }
6528                 } else {
6529                         return -EINVAL;
6530                 }
6531
6532                 var = btf__type_by_id(obj->btf, vi->type);
6533                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6534                 if (btf_vlen(def) == 0)
6535                         return -EINVAL;
6536                 member = btf_members(def) + btf_vlen(def) - 1;
6537                 mname = btf__name_by_offset(obj->btf, member->name_off);
6538                 if (strcmp(mname, "values"))
6539                         return -EINVAL;
6540
6541                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6542                 if (rel->r_offset - vi->offset < moff)
6543                         return -EINVAL;
6544
6545                 moff = rel->r_offset - vi->offset - moff;
6546                 /* here we use BPF pointer size, which is always 64 bit, as we
6547                  * are parsing ELF that was built for BPF target
6548                  */
6549                 if (moff % bpf_ptr_sz)
6550                         return -EINVAL;
6551                 moff /= bpf_ptr_sz;
6552                 if (moff >= map->init_slots_sz) {
6553                         new_sz = moff + 1;
6554                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6555                         if (!tmp)
6556                                 return -ENOMEM;
6557                         map->init_slots = tmp;
6558                         memset(map->init_slots + map->init_slots_sz, 0,
6559                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6560                         map->init_slots_sz = new_sz;
6561                 }
6562                 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
6563
6564                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
6565                          i, map->name, moff, type, name);
6566         }
6567
6568         return 0;
6569 }
6570
6571 static int bpf_object__collect_relos(struct bpf_object *obj)
6572 {
6573         int i, err;
6574
6575         for (i = 0; i < obj->efile.sec_cnt; i++) {
6576                 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
6577                 Elf64_Shdr *shdr;
6578                 Elf_Data *data;
6579                 int idx;
6580
6581                 if (sec_desc->sec_type != SEC_RELO)
6582                         continue;
6583
6584                 shdr = sec_desc->shdr;
6585                 data = sec_desc->data;
6586                 idx = shdr->sh_info;
6587
6588                 if (shdr->sh_type != SHT_REL) {
6589                         pr_warn("internal error at %d\n", __LINE__);
6590                         return -LIBBPF_ERRNO__INTERNAL;
6591                 }
6592
6593                 if (idx == obj->efile.st_ops_shndx)
6594                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6595                 else if (idx == obj->efile.btf_maps_shndx)
6596                         err = bpf_object__collect_map_relos(obj, shdr, data);
6597                 else
6598                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6599                 if (err)
6600                         return err;
6601         }
6602
6603         bpf_object__sort_relos(obj);
6604         return 0;
6605 }
6606
6607 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6608 {
6609         if (BPF_CLASS(insn->code) == BPF_JMP &&
6610             BPF_OP(insn->code) == BPF_CALL &&
6611             BPF_SRC(insn->code) == BPF_K &&
6612             insn->src_reg == 0 &&
6613             insn->dst_reg == 0) {
6614                     *func_id = insn->imm;
6615                     return true;
6616         }
6617         return false;
6618 }
6619
6620 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
6621 {
6622         struct bpf_insn *insn = prog->insns;
6623         enum bpf_func_id func_id;
6624         int i;
6625
6626         if (obj->gen_loader)
6627                 return 0;
6628
6629         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6630                 if (!insn_is_helper_call(insn, &func_id))
6631                         continue;
6632
6633                 /* on kernels that don't yet support
6634                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6635                  * to bpf_probe_read() which works well for old kernels
6636                  */
6637                 switch (func_id) {
6638                 case BPF_FUNC_probe_read_kernel:
6639                 case BPF_FUNC_probe_read_user:
6640                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6641                                 insn->imm = BPF_FUNC_probe_read;
6642                         break;
6643                 case BPF_FUNC_probe_read_kernel_str:
6644                 case BPF_FUNC_probe_read_user_str:
6645                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6646                                 insn->imm = BPF_FUNC_probe_read_str;
6647                         break;
6648                 default:
6649                         break;
6650                 }
6651         }
6652         return 0;
6653 }
6654
6655 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
6656                                      int *btf_obj_fd, int *btf_type_id);
6657
6658 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
6659 static int libbpf_prepare_prog_load(struct bpf_program *prog,
6660                                     struct bpf_prog_load_opts *opts, long cookie)
6661 {
6662         enum sec_def_flags def = cookie;
6663
6664         /* old kernels might not support specifying expected_attach_type */
6665         if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
6666                 opts->expected_attach_type = 0;
6667
6668         if (def & SEC_SLEEPABLE)
6669                 opts->prog_flags |= BPF_F_SLEEPABLE;
6670
6671         if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
6672                 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
6673
6674         if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
6675                 int btf_obj_fd = 0, btf_type_id = 0, err;
6676                 const char *attach_name;
6677
6678                 attach_name = strchr(prog->sec_name, '/');
6679                 if (!attach_name) {
6680                         /* if BPF program is annotated with just SEC("fentry")
6681                          * (or similar) without declaratively specifying
6682                          * target, then it is expected that target will be
6683                          * specified with bpf_program__set_attach_target() at
6684                          * runtime before BPF object load step. If not, then
6685                          * there is nothing to load into the kernel as BPF
6686                          * verifier won't be able to validate BPF program
6687                          * correctness anyways.
6688                          */
6689                         pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
6690                                 prog->name);
6691                         return -EINVAL;
6692                 }
6693                 attach_name++; /* skip over / */
6694
6695                 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
6696                 if (err)
6697                         return err;
6698
6699                 /* cache resolved BTF FD and BTF type ID in the prog */
6700                 prog->attach_btf_obj_fd = btf_obj_fd;
6701                 prog->attach_btf_id = btf_type_id;
6702
6703                 /* but by now libbpf common logic is not utilizing
6704                  * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
6705                  * this callback is called after opts were populated by
6706                  * libbpf, so this callback has to update opts explicitly here
6707                  */
6708                 opts->attach_btf_obj_fd = btf_obj_fd;
6709                 opts->attach_btf_id = btf_type_id;
6710         }
6711         return 0;
6712 }
6713
6714 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
6715
6716 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
6717                                 struct bpf_insn *insns, int insns_cnt,
6718                                 const char *license, __u32 kern_version, int *prog_fd)
6719 {
6720         LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
6721         const char *prog_name = NULL;
6722         char *cp, errmsg[STRERR_BUFSIZE];
6723         size_t log_buf_size = 0;
6724         char *log_buf = NULL, *tmp;
6725         int btf_fd, ret, err;
6726         bool own_log_buf = true;
6727         __u32 log_level = prog->log_level;
6728
6729         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
6730                 /*
6731                  * The program type must be set.  Most likely we couldn't find a proper
6732                  * section definition at load time, and thus we didn't infer the type.
6733                  */
6734                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
6735                         prog->name, prog->sec_name);
6736                 return -EINVAL;
6737         }
6738
6739         if (!insns || !insns_cnt)
6740                 return -EINVAL;
6741
6742         load_attr.expected_attach_type = prog->expected_attach_type;
6743         if (kernel_supports(obj, FEAT_PROG_NAME))
6744                 prog_name = prog->name;
6745         load_attr.attach_prog_fd = prog->attach_prog_fd;
6746         load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
6747         load_attr.attach_btf_id = prog->attach_btf_id;
6748         load_attr.kern_version = kern_version;
6749         load_attr.prog_ifindex = prog->prog_ifindex;
6750
6751         /* specify func_info/line_info only if kernel supports them */
6752         btf_fd = bpf_object__btf_fd(obj);
6753         if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
6754                 load_attr.prog_btf_fd = btf_fd;
6755                 load_attr.func_info = prog->func_info;
6756                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6757                 load_attr.func_info_cnt = prog->func_info_cnt;
6758                 load_attr.line_info = prog->line_info;
6759                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6760                 load_attr.line_info_cnt = prog->line_info_cnt;
6761         }
6762         load_attr.log_level = log_level;
6763         load_attr.prog_flags = prog->prog_flags;
6764         load_attr.fd_array = obj->fd_array;
6765
6766         /* adjust load_attr if sec_def provides custom preload callback */
6767         if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
6768                 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
6769                 if (err < 0) {
6770                         pr_warn("prog '%s': failed to prepare load attributes: %d\n",
6771                                 prog->name, err);
6772                         return err;
6773                 }
6774                 insns = prog->insns;
6775                 insns_cnt = prog->insns_cnt;
6776         }
6777
6778         if (obj->gen_loader) {
6779                 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
6780                                    license, insns, insns_cnt, &load_attr,
6781                                    prog - obj->programs);
6782                 *prog_fd = -1;
6783                 return 0;
6784         }
6785
6786 retry_load:
6787         /* if log_level is zero, we don't request logs initially even if
6788          * custom log_buf is specified; if the program load fails, then we'll
6789          * bump log_level to 1 and use either custom log_buf or we'll allocate
6790          * our own and retry the load to get details on what failed
6791          */
6792         if (log_level) {
6793                 if (prog->log_buf) {
6794                         log_buf = prog->log_buf;
6795                         log_buf_size = prog->log_size;
6796                         own_log_buf = false;
6797                 } else if (obj->log_buf) {
6798                         log_buf = obj->log_buf;
6799                         log_buf_size = obj->log_size;
6800                         own_log_buf = false;
6801                 } else {
6802                         log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
6803                         tmp = realloc(log_buf, log_buf_size);
6804                         if (!tmp) {
6805                                 ret = -ENOMEM;
6806                                 goto out;
6807                         }
6808                         log_buf = tmp;
6809                         log_buf[0] = '\0';
6810                         own_log_buf = true;
6811                 }
6812         }
6813
6814         load_attr.log_buf = log_buf;
6815         load_attr.log_size = log_buf_size;
6816         load_attr.log_level = log_level;
6817
6818         ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
6819         if (ret >= 0) {
6820                 if (log_level && own_log_buf) {
6821                         pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6822                                  prog->name, log_buf);
6823                 }
6824
6825                 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
6826                         struct bpf_map *map;
6827                         int i;
6828
6829                         for (i = 0; i < obj->nr_maps; i++) {
6830                                 map = &prog->obj->maps[i];
6831                                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
6832                                         continue;
6833
6834                                 if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
6835                                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6836                                         pr_warn("prog '%s': failed to bind map '%s': %s\n",
6837                                                 prog->name, map->real_name, cp);
6838                                         /* Don't fail hard if can't bind rodata. */
6839                                 }
6840                         }
6841                 }
6842
6843                 *prog_fd = ret;
6844                 ret = 0;
6845                 goto out;
6846         }
6847
6848         if (log_level == 0) {
6849                 log_level = 1;
6850                 goto retry_load;
6851         }
6852         /* On ENOSPC, increase log buffer size and retry, unless custom
6853          * log_buf is specified.
6854          * Be careful to not overflow u32, though. Kernel's log buf size limit
6855          * isn't part of UAPI so it can always be bumped to full 4GB. So don't
6856          * multiply by 2 unless we are sure we'll fit within 32 bits.
6857          * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
6858          */
6859         if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
6860                 goto retry_load;
6861
6862         ret = -errno;
6863
6864         /* post-process verifier log to improve error descriptions */
6865         fixup_verifier_log(prog, log_buf, log_buf_size);
6866
6867         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6868         pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
6869         pr_perm_msg(ret);
6870
6871         if (own_log_buf && log_buf && log_buf[0] != '\0') {
6872                 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6873                         prog->name, log_buf);
6874         }
6875
6876 out:
6877         if (own_log_buf)
6878                 free(log_buf);
6879         return ret;
6880 }
6881
6882 static char *find_prev_line(char *buf, char *cur)
6883 {
6884         char *p;
6885
6886         if (cur == buf) /* end of a log buf */
6887                 return NULL;
6888
6889         p = cur - 1;
6890         while (p - 1 >= buf && *(p - 1) != '\n')
6891                 p--;
6892
6893         return p;
6894 }
6895
6896 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
6897                       char *orig, size_t orig_sz, const char *patch)
6898 {
6899         /* size of the remaining log content to the right from the to-be-replaced part */
6900         size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
6901         size_t patch_sz = strlen(patch);
6902
6903         if (patch_sz != orig_sz) {
6904                 /* If patch line(s) are longer than original piece of verifier log,
6905                  * shift log contents by (patch_sz - orig_sz) bytes to the right
6906                  * starting from after to-be-replaced part of the log.
6907                  *
6908                  * If patch line(s) are shorter than original piece of verifier log,
6909                  * shift log contents by (orig_sz - patch_sz) bytes to the left
6910                  * starting from after to-be-replaced part of the log
6911                  *
6912                  * We need to be careful about not overflowing available
6913                  * buf_sz capacity. If that's the case, we'll truncate the end
6914                  * of the original log, as necessary.
6915                  */
6916                 if (patch_sz > orig_sz) {
6917                         if (orig + patch_sz >= buf + buf_sz) {
6918                                 /* patch is big enough to cover remaining space completely */
6919                                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
6920                                 rem_sz = 0;
6921                         } else if (patch_sz - orig_sz > buf_sz - log_sz) {
6922                                 /* patch causes part of remaining log to be truncated */
6923                                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
6924                         }
6925                 }
6926                 /* shift remaining log to the right by calculated amount */
6927                 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
6928         }
6929
6930         memcpy(orig, patch, patch_sz);
6931 }
6932
6933 static void fixup_log_failed_core_relo(struct bpf_program *prog,
6934                                        char *buf, size_t buf_sz, size_t log_sz,
6935                                        char *line1, char *line2, char *line3)
6936 {
6937         /* Expected log for failed and not properly guarded CO-RE relocation:
6938          * line1 -> 123: (85) call unknown#195896080
6939          * line2 -> invalid func unknown#195896080
6940          * line3 -> <anything else or end of buffer>
6941          *
6942          * "123" is the index of the instruction that was poisoned. We extract
6943          * instruction index to find corresponding CO-RE relocation and
6944          * replace this part of the log with more relevant information about
6945          * failed CO-RE relocation.
6946          */
6947         const struct bpf_core_relo *relo;
6948         struct bpf_core_spec spec;
6949         char patch[512], spec_buf[256];
6950         int insn_idx, err, spec_len;
6951
6952         if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
6953                 return;
6954
6955         relo = find_relo_core(prog, insn_idx);
6956         if (!relo)
6957                 return;
6958
6959         err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
6960         if (err)
6961                 return;
6962
6963         spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
6964         snprintf(patch, sizeof(patch),
6965                  "%d: <invalid CO-RE relocation>\n"
6966                  "failed to resolve CO-RE relocation %s%s\n",
6967                  insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
6968
6969         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
6970 }
6971
6972 static void fixup_log_missing_map_load(struct bpf_program *prog,
6973                                        char *buf, size_t buf_sz, size_t log_sz,
6974                                        char *line1, char *line2, char *line3)
6975 {
6976         /* Expected log for failed and not properly guarded CO-RE relocation:
6977          * line1 -> 123: (85) call unknown#2001000345
6978          * line2 -> invalid func unknown#2001000345
6979          * line3 -> <anything else or end of buffer>
6980          *
6981          * "123" is the index of the instruction that was poisoned.
6982          * "345" in "2001000345" are map index in obj->maps to fetch map name.
6983          */
6984         struct bpf_object *obj = prog->obj;
6985         const struct bpf_map *map;
6986         int insn_idx, map_idx;
6987         char patch[128];
6988
6989         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
6990                 return;
6991
6992         map_idx -= MAP_LDIMM64_POISON_BASE;
6993         if (map_idx < 0 || map_idx >= obj->nr_maps)
6994                 return;
6995         map = &obj->maps[map_idx];
6996
6997         snprintf(patch, sizeof(patch),
6998                  "%d: <invalid BPF map reference>\n"
6999                  "BPF map '%s' is referenced but wasn't created\n",
7000                  insn_idx, map->name);
7001
7002         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7003 }
7004
7005 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7006 {
7007         /* look for familiar error patterns in last N lines of the log */
7008         const size_t max_last_line_cnt = 10;
7009         char *prev_line, *cur_line, *next_line;
7010         size_t log_sz;
7011         int i;
7012
7013         if (!buf)
7014                 return;
7015
7016         log_sz = strlen(buf) + 1;
7017         next_line = buf + log_sz - 1;
7018
7019         for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7020                 cur_line = find_prev_line(buf, next_line);
7021                 if (!cur_line)
7022                         return;
7023
7024                 /* failed CO-RE relocation case */
7025                 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7026                         prev_line = find_prev_line(buf, cur_line);
7027                         if (!prev_line)
7028                                 continue;
7029
7030                         fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7031                                                    prev_line, cur_line, next_line);
7032                         return;
7033                 } else if (str_has_pfx(cur_line, "invalid func unknown#"MAP_LDIMM64_POISON_PFX)) {
7034                         prev_line = find_prev_line(buf, cur_line);
7035                         if (!prev_line)
7036                                 continue;
7037
7038                         fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7039                                                    prev_line, cur_line, next_line);
7040                         return;
7041                 }
7042         }
7043 }
7044
7045 static int bpf_program_record_relos(struct bpf_program *prog)
7046 {
7047         struct bpf_object *obj = prog->obj;
7048         int i;
7049
7050         for (i = 0; i < prog->nr_reloc; i++) {
7051                 struct reloc_desc *relo = &prog->reloc_desc[i];
7052                 struct extern_desc *ext = &obj->externs[relo->sym_off];
7053
7054                 switch (relo->type) {
7055                 case RELO_EXTERN_VAR:
7056                         if (ext->type != EXT_KSYM)
7057                                 continue;
7058                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7059                                                ext->is_weak, !ext->ksym.type_id,
7060                                                BTF_KIND_VAR, relo->insn_idx);
7061                         break;
7062                 case RELO_EXTERN_FUNC:
7063                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7064                                                ext->is_weak, false, BTF_KIND_FUNC,
7065                                                relo->insn_idx);
7066                         break;
7067                 case RELO_CORE: {
7068                         struct bpf_core_relo cr = {
7069                                 .insn_off = relo->insn_idx * 8,
7070                                 .type_id = relo->core_relo->type_id,
7071                                 .access_str_off = relo->core_relo->access_str_off,
7072                                 .kind = relo->core_relo->kind,
7073                         };
7074
7075                         bpf_gen__record_relo_core(obj->gen_loader, &cr);
7076                         break;
7077                 }
7078                 default:
7079                         continue;
7080                 }
7081         }
7082         return 0;
7083 }
7084
7085 static int
7086 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7087 {
7088         struct bpf_program *prog;
7089         size_t i;
7090         int err;
7091
7092         for (i = 0; i < obj->nr_programs; i++) {
7093                 prog = &obj->programs[i];
7094                 err = bpf_object__sanitize_prog(obj, prog);
7095                 if (err)
7096                         return err;
7097         }
7098
7099         for (i = 0; i < obj->nr_programs; i++) {
7100                 prog = &obj->programs[i];
7101                 if (prog_is_subprog(obj, prog))
7102                         continue;
7103                 if (!prog->autoload) {
7104                         pr_debug("prog '%s': skipped loading\n", prog->name);
7105                         continue;
7106                 }
7107                 prog->log_level |= log_level;
7108
7109                 if (obj->gen_loader)
7110                         bpf_program_record_relos(prog);
7111
7112                 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7113                                            obj->license, obj->kern_version, &prog->fd);
7114                 if (err) {
7115                         pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7116                         return err;
7117                 }
7118         }
7119
7120         bpf_object__free_relocs(obj);
7121         return 0;
7122 }
7123
7124 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7125
7126 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7127 {
7128         struct bpf_program *prog;
7129         int err;
7130
7131         bpf_object__for_each_program(prog, obj) {
7132                 prog->sec_def = find_sec_def(prog->sec_name);
7133                 if (!prog->sec_def) {
7134                         /* couldn't guess, but user might manually specify */
7135                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7136                                 prog->name, prog->sec_name);
7137                         continue;
7138                 }
7139
7140                 prog->type = prog->sec_def->prog_type;
7141                 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7142
7143                 /* sec_def can have custom callback which should be called
7144                  * after bpf_program is initialized to adjust its properties
7145                  */
7146                 if (prog->sec_def->prog_setup_fn) {
7147                         err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7148                         if (err < 0) {
7149                                 pr_warn("prog '%s': failed to initialize: %d\n",
7150                                         prog->name, err);
7151                                 return err;
7152                         }
7153                 }
7154         }
7155
7156         return 0;
7157 }
7158
7159 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7160                                           const struct bpf_object_open_opts *opts)
7161 {
7162         const char *obj_name, *kconfig, *btf_tmp_path;
7163         struct bpf_object *obj;
7164         char tmp_name[64];
7165         int err;
7166         char *log_buf;
7167         size_t log_size;
7168         __u32 log_level;
7169
7170         if (elf_version(EV_CURRENT) == EV_NONE) {
7171                 pr_warn("failed to init libelf for %s\n",
7172                         path ? : "(mem buf)");
7173                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7174         }
7175
7176         if (!OPTS_VALID(opts, bpf_object_open_opts))
7177                 return ERR_PTR(-EINVAL);
7178
7179         obj_name = OPTS_GET(opts, object_name, NULL);
7180         if (obj_buf) {
7181                 if (!obj_name) {
7182                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7183                                  (unsigned long)obj_buf,
7184                                  (unsigned long)obj_buf_sz);
7185                         obj_name = tmp_name;
7186                 }
7187                 path = obj_name;
7188                 pr_debug("loading object '%s' from buffer\n", obj_name);
7189         }
7190
7191         log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7192         log_size = OPTS_GET(opts, kernel_log_size, 0);
7193         log_level = OPTS_GET(opts, kernel_log_level, 0);
7194         if (log_size > UINT_MAX)
7195                 return ERR_PTR(-EINVAL);
7196         if (log_size && !log_buf)
7197                 return ERR_PTR(-EINVAL);
7198
7199         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7200         if (IS_ERR(obj))
7201                 return obj;
7202
7203         obj->log_buf = log_buf;
7204         obj->log_size = log_size;
7205         obj->log_level = log_level;
7206
7207         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7208         if (btf_tmp_path) {
7209                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7210                         err = -ENAMETOOLONG;
7211                         goto out;
7212                 }
7213                 obj->btf_custom_path = strdup(btf_tmp_path);
7214                 if (!obj->btf_custom_path) {
7215                         err = -ENOMEM;
7216                         goto out;
7217                 }
7218         }
7219
7220         kconfig = OPTS_GET(opts, kconfig, NULL);
7221         if (kconfig) {
7222                 obj->kconfig = strdup(kconfig);
7223                 if (!obj->kconfig) {
7224                         err = -ENOMEM;
7225                         goto out;
7226                 }
7227         }
7228
7229         err = bpf_object__elf_init(obj);
7230         err = err ? : bpf_object__check_endianness(obj);
7231         err = err ? : bpf_object__elf_collect(obj);
7232         err = err ? : bpf_object__collect_externs(obj);
7233         err = err ? : bpf_object__finalize_btf(obj);
7234         err = err ? : bpf_object__init_maps(obj, opts);
7235         err = err ? : bpf_object_init_progs(obj, opts);
7236         err = err ? : bpf_object__collect_relos(obj);
7237         if (err)
7238                 goto out;
7239
7240         bpf_object__elf_finish(obj);
7241
7242         return obj;
7243 out:
7244         bpf_object__close(obj);
7245         return ERR_PTR(err);
7246 }
7247
7248 struct bpf_object *
7249 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7250 {
7251         if (!path)
7252                 return libbpf_err_ptr(-EINVAL);
7253
7254         pr_debug("loading %s\n", path);
7255
7256         return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7257 }
7258
7259 struct bpf_object *bpf_object__open(const char *path)
7260 {
7261         return bpf_object__open_file(path, NULL);
7262 }
7263
7264 struct bpf_object *
7265 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7266                      const struct bpf_object_open_opts *opts)
7267 {
7268         if (!obj_buf || obj_buf_sz == 0)
7269                 return libbpf_err_ptr(-EINVAL);
7270
7271         return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7272 }
7273
7274 static int bpf_object_unload(struct bpf_object *obj)
7275 {
7276         size_t i;
7277
7278         if (!obj)
7279                 return libbpf_err(-EINVAL);
7280
7281         for (i = 0; i < obj->nr_maps; i++) {
7282                 zclose(obj->maps[i].fd);
7283                 if (obj->maps[i].st_ops)
7284                         zfree(&obj->maps[i].st_ops->kern_vdata);
7285         }
7286
7287         for (i = 0; i < obj->nr_programs; i++)
7288                 bpf_program__unload(&obj->programs[i]);
7289
7290         return 0;
7291 }
7292
7293 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7294 {
7295         struct bpf_map *m;
7296
7297         bpf_object__for_each_map(m, obj) {
7298                 if (!bpf_map__is_internal(m))
7299                         continue;
7300                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7301                         m->def.map_flags ^= BPF_F_MMAPABLE;
7302         }
7303
7304         return 0;
7305 }
7306
7307 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7308 {
7309         char sym_type, sym_name[500];
7310         unsigned long long sym_addr;
7311         int ret, err = 0;
7312         FILE *f;
7313
7314         f = fopen("/proc/kallsyms", "r");
7315         if (!f) {
7316                 err = -errno;
7317                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7318                 return err;
7319         }
7320
7321         while (true) {
7322                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7323                              &sym_addr, &sym_type, sym_name);
7324                 if (ret == EOF && feof(f))
7325                         break;
7326                 if (ret != 3) {
7327                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7328                         err = -EINVAL;
7329                         break;
7330                 }
7331
7332                 err = cb(sym_addr, sym_type, sym_name, ctx);
7333                 if (err)
7334                         break;
7335         }
7336
7337         fclose(f);
7338         return err;
7339 }
7340
7341 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7342                        const char *sym_name, void *ctx)
7343 {
7344         struct bpf_object *obj = ctx;
7345         const struct btf_type *t;
7346         struct extern_desc *ext;
7347
7348         ext = find_extern_by_name(obj, sym_name);
7349         if (!ext || ext->type != EXT_KSYM)
7350                 return 0;
7351
7352         t = btf__type_by_id(obj->btf, ext->btf_id);
7353         if (!btf_is_var(t))
7354                 return 0;
7355
7356         if (ext->is_set && ext->ksym.addr != sym_addr) {
7357                 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7358                         sym_name, ext->ksym.addr, sym_addr);
7359                 return -EINVAL;
7360         }
7361         if (!ext->is_set) {
7362                 ext->is_set = true;
7363                 ext->ksym.addr = sym_addr;
7364                 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7365         }
7366         return 0;
7367 }
7368
7369 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7370 {
7371         return libbpf_kallsyms_parse(kallsyms_cb, obj);
7372 }
7373
7374 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7375                             __u16 kind, struct btf **res_btf,
7376                             struct module_btf **res_mod_btf)
7377 {
7378         struct module_btf *mod_btf;
7379         struct btf *btf;
7380         int i, id, err;
7381
7382         btf = obj->btf_vmlinux;
7383         mod_btf = NULL;
7384         id = btf__find_by_name_kind(btf, ksym_name, kind);
7385
7386         if (id == -ENOENT) {
7387                 err = load_module_btfs(obj);
7388                 if (err)
7389                         return err;
7390
7391                 for (i = 0; i < obj->btf_module_cnt; i++) {
7392                         /* we assume module_btf's BTF FD is always >0 */
7393                         mod_btf = &obj->btf_modules[i];
7394                         btf = mod_btf->btf;
7395                         id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7396                         if (id != -ENOENT)
7397                                 break;
7398                 }
7399         }
7400         if (id <= 0)
7401                 return -ESRCH;
7402
7403         *res_btf = btf;
7404         *res_mod_btf = mod_btf;
7405         return id;
7406 }
7407
7408 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7409                                                struct extern_desc *ext)
7410 {
7411         const struct btf_type *targ_var, *targ_type;
7412         __u32 targ_type_id, local_type_id;
7413         struct module_btf *mod_btf = NULL;
7414         const char *targ_var_name;
7415         struct btf *btf = NULL;
7416         int id, err;
7417
7418         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7419         if (id < 0) {
7420                 if (id == -ESRCH && ext->is_weak)
7421                         return 0;
7422                 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7423                         ext->name);
7424                 return id;
7425         }
7426
7427         /* find local type_id */
7428         local_type_id = ext->ksym.type_id;
7429
7430         /* find target type_id */
7431         targ_var = btf__type_by_id(btf, id);
7432         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7433         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7434
7435         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7436                                         btf, targ_type_id);
7437         if (err <= 0) {
7438                 const struct btf_type *local_type;
7439                 const char *targ_name, *local_name;
7440
7441                 local_type = btf__type_by_id(obj->btf, local_type_id);
7442                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7443                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7444
7445                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7446                         ext->name, local_type_id,
7447                         btf_kind_str(local_type), local_name, targ_type_id,
7448                         btf_kind_str(targ_type), targ_name);
7449                 return -EINVAL;
7450         }
7451
7452         ext->is_set = true;
7453         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7454         ext->ksym.kernel_btf_id = id;
7455         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7456                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7457
7458         return 0;
7459 }
7460
7461 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7462                                                 struct extern_desc *ext)
7463 {
7464         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7465         struct module_btf *mod_btf = NULL;
7466         const struct btf_type *kern_func;
7467         struct btf *kern_btf = NULL;
7468         int ret;
7469
7470         local_func_proto_id = ext->ksym.type_id;
7471
7472         kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
7473         if (kfunc_id < 0) {
7474                 if (kfunc_id == -ESRCH && ext->is_weak)
7475                         return 0;
7476                 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7477                         ext->name);
7478                 return kfunc_id;
7479         }
7480
7481         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7482         kfunc_proto_id = kern_func->type;
7483
7484         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7485                                         kern_btf, kfunc_proto_id);
7486         if (ret <= 0) {
7487                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
7488                         ext->name, local_func_proto_id, kfunc_proto_id);
7489                 return -EINVAL;
7490         }
7491
7492         /* set index for module BTF fd in fd_array, if unset */
7493         if (mod_btf && !mod_btf->fd_array_idx) {
7494                 /* insn->off is s16 */
7495                 if (obj->fd_array_cnt == INT16_MAX) {
7496                         pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
7497                                 ext->name, mod_btf->fd_array_idx);
7498                         return -E2BIG;
7499                 }
7500                 /* Cannot use index 0 for module BTF fd */
7501                 if (!obj->fd_array_cnt)
7502                         obj->fd_array_cnt = 1;
7503
7504                 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
7505                                         obj->fd_array_cnt + 1);
7506                 if (ret)
7507                         return ret;
7508                 mod_btf->fd_array_idx = obj->fd_array_cnt;
7509                 /* we assume module BTF FD is always >0 */
7510                 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
7511         }
7512
7513         ext->is_set = true;
7514         ext->ksym.kernel_btf_id = kfunc_id;
7515         ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
7516         pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
7517                  ext->name, kfunc_id);
7518
7519         return 0;
7520 }
7521
7522 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7523 {
7524         const struct btf_type *t;
7525         struct extern_desc *ext;
7526         int i, err;
7527
7528         for (i = 0; i < obj->nr_extern; i++) {
7529                 ext = &obj->externs[i];
7530                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7531                         continue;
7532
7533                 if (obj->gen_loader) {
7534                         ext->is_set = true;
7535                         ext->ksym.kernel_btf_obj_fd = 0;
7536                         ext->ksym.kernel_btf_id = 0;
7537                         continue;
7538                 }
7539                 t = btf__type_by_id(obj->btf, ext->btf_id);
7540                 if (btf_is_var(t))
7541                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7542                 else
7543                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
7544                 if (err)
7545                         return err;
7546         }
7547         return 0;
7548 }
7549
7550 static int bpf_object__resolve_externs(struct bpf_object *obj,
7551                                        const char *extra_kconfig)
7552 {
7553         bool need_config = false, need_kallsyms = false;
7554         bool need_vmlinux_btf = false;
7555         struct extern_desc *ext;
7556         void *kcfg_data = NULL;
7557         int err, i;
7558
7559         if (obj->nr_extern == 0)
7560                 return 0;
7561
7562         if (obj->kconfig_map_idx >= 0)
7563                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7564
7565         for (i = 0; i < obj->nr_extern; i++) {
7566                 ext = &obj->externs[i];
7567
7568                 if (ext->type == EXT_KSYM) {
7569                         if (ext->ksym.type_id)
7570                                 need_vmlinux_btf = true;
7571                         else
7572                                 need_kallsyms = true;
7573                         continue;
7574                 } else if (ext->type == EXT_KCFG) {
7575                         void *ext_ptr = kcfg_data + ext->kcfg.data_off;
7576                         __u64 value = 0;
7577
7578                         /* Kconfig externs need actual /proc/config.gz */
7579                         if (str_has_pfx(ext->name, "CONFIG_")) {
7580                                 need_config = true;
7581                                 continue;
7582                         }
7583
7584                         /* Virtual kcfg externs are customly handled by libbpf */
7585                         if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7586                                 value = get_kernel_version();
7587                                 if (!value) {
7588                                         pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
7589                                         return -EINVAL;
7590                                 }
7591                         } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
7592                                 value = kernel_supports(obj, FEAT_BPF_COOKIE);
7593                         } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
7594                                 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
7595                         } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
7596                                 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
7597                                  * __kconfig externs, where LINUX_ ones are virtual and filled out
7598                                  * customly by libbpf (their values don't come from Kconfig).
7599                                  * If LINUX_xxx variable is not recognized by libbpf, but is marked
7600                                  * __weak, it defaults to zero value, just like for CONFIG_xxx
7601                                  * externs.
7602                                  */
7603                                 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
7604                                 return -EINVAL;
7605                         }
7606
7607                         err = set_kcfg_value_num(ext, ext_ptr, value);
7608                         if (err)
7609                                 return err;
7610                         pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
7611                                  ext->name, (long long)value);
7612                 } else {
7613                         pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
7614                         return -EINVAL;
7615                 }
7616         }
7617         if (need_config && extra_kconfig) {
7618                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7619                 if (err)
7620                         return -EINVAL;
7621                 need_config = false;
7622                 for (i = 0; i < obj->nr_extern; i++) {
7623                         ext = &obj->externs[i];
7624                         if (ext->type == EXT_KCFG && !ext->is_set) {
7625                                 need_config = true;
7626                                 break;
7627                         }
7628                 }
7629         }
7630         if (need_config) {
7631                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
7632                 if (err)
7633                         return -EINVAL;
7634         }
7635         if (need_kallsyms) {
7636                 err = bpf_object__read_kallsyms_file(obj);
7637                 if (err)
7638                         return -EINVAL;
7639         }
7640         if (need_vmlinux_btf) {
7641                 err = bpf_object__resolve_ksyms_btf_id(obj);
7642                 if (err)
7643                         return -EINVAL;
7644         }
7645         for (i = 0; i < obj->nr_extern; i++) {
7646                 ext = &obj->externs[i];
7647
7648                 if (!ext->is_set && !ext->is_weak) {
7649                         pr_warn("extern '%s' (strong): not resolved\n", ext->name);
7650                         return -ESRCH;
7651                 } else if (!ext->is_set) {
7652                         pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
7653                                  ext->name);
7654                 }
7655         }
7656
7657         return 0;
7658 }
7659
7660 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
7661 {
7662         int err, i;
7663
7664         if (!obj)
7665                 return libbpf_err(-EINVAL);
7666
7667         if (obj->loaded) {
7668                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7669                 return libbpf_err(-EINVAL);
7670         }
7671
7672         if (obj->gen_loader)
7673                 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
7674
7675         err = bpf_object__probe_loading(obj);
7676         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
7677         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7678         err = err ? : bpf_object__sanitize_and_load_btf(obj);
7679         err = err ? : bpf_object__sanitize_maps(obj);
7680         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7681         err = err ? : bpf_object__create_maps(obj);
7682         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
7683         err = err ? : bpf_object__load_progs(obj, extra_log_level);
7684         err = err ? : bpf_object_init_prog_arrays(obj);
7685
7686         if (obj->gen_loader) {
7687                 /* reset FDs */
7688                 if (obj->btf)
7689                         btf__set_fd(obj->btf, -1);
7690                 for (i = 0; i < obj->nr_maps; i++)
7691                         obj->maps[i].fd = -1;
7692                 if (!err)
7693                         err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
7694         }
7695
7696         /* clean up fd_array */
7697         zfree(&obj->fd_array);
7698
7699         /* clean up module BTFs */
7700         for (i = 0; i < obj->btf_module_cnt; i++) {
7701                 close(obj->btf_modules[i].fd);
7702                 btf__free(obj->btf_modules[i].btf);
7703                 free(obj->btf_modules[i].name);
7704         }
7705         free(obj->btf_modules);
7706
7707         /* clean up vmlinux BTF */
7708         btf__free(obj->btf_vmlinux);
7709         obj->btf_vmlinux = NULL;
7710
7711         obj->loaded = true; /* doesn't matter if successfully or not */
7712
7713         if (err)
7714                 goto out;
7715
7716         return 0;
7717 out:
7718         /* unpin any maps that were auto-pinned during load */
7719         for (i = 0; i < obj->nr_maps; i++)
7720                 if (obj->maps[i].pinned && !obj->maps[i].reused)
7721                         bpf_map__unpin(&obj->maps[i], NULL);
7722
7723         bpf_object_unload(obj);
7724         pr_warn("failed to load object '%s'\n", obj->path);
7725         return libbpf_err(err);
7726 }
7727
7728 int bpf_object__load(struct bpf_object *obj)
7729 {
7730         return bpf_object_load(obj, 0, NULL);
7731 }
7732
7733 static int make_parent_dir(const char *path)
7734 {
7735         char *cp, errmsg[STRERR_BUFSIZE];
7736         char *dname, *dir;
7737         int err = 0;
7738
7739         dname = strdup(path);
7740         if (dname == NULL)
7741                 return -ENOMEM;
7742
7743         dir = dirname(dname);
7744         if (mkdir(dir, 0700) && errno != EEXIST)
7745                 err = -errno;
7746
7747         free(dname);
7748         if (err) {
7749                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7750                 pr_warn("failed to mkdir %s: %s\n", path, cp);
7751         }
7752         return err;
7753 }
7754
7755 static int check_path(const char *path)
7756 {
7757         char *cp, errmsg[STRERR_BUFSIZE];
7758         struct statfs st_fs;
7759         char *dname, *dir;
7760         int err = 0;
7761
7762         if (path == NULL)
7763                 return -EINVAL;
7764
7765         dname = strdup(path);
7766         if (dname == NULL)
7767                 return -ENOMEM;
7768
7769         dir = dirname(dname);
7770         if (statfs(dir, &st_fs)) {
7771                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7772                 pr_warn("failed to statfs %s: %s\n", dir, cp);
7773                 err = -errno;
7774         }
7775         free(dname);
7776
7777         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7778                 pr_warn("specified path %s is not on BPF FS\n", path);
7779                 err = -EINVAL;
7780         }
7781
7782         return err;
7783 }
7784
7785 int bpf_program__pin(struct bpf_program *prog, const char *path)
7786 {
7787         char *cp, errmsg[STRERR_BUFSIZE];
7788         int err;
7789
7790         if (prog->fd < 0) {
7791                 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
7792                 return libbpf_err(-EINVAL);
7793         }
7794
7795         err = make_parent_dir(path);
7796         if (err)
7797                 return libbpf_err(err);
7798
7799         err = check_path(path);
7800         if (err)
7801                 return libbpf_err(err);
7802
7803         if (bpf_obj_pin(prog->fd, path)) {
7804                 err = -errno;
7805                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7806                 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
7807                 return libbpf_err(err);
7808         }
7809
7810         pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
7811         return 0;
7812 }
7813
7814 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7815 {
7816         int err;
7817
7818         if (prog->fd < 0) {
7819                 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
7820                 return libbpf_err(-EINVAL);
7821         }
7822
7823         err = check_path(path);
7824         if (err)
7825                 return libbpf_err(err);
7826
7827         err = unlink(path);
7828         if (err)
7829                 return libbpf_err(-errno);
7830
7831         pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
7832         return 0;
7833 }
7834
7835 int bpf_map__pin(struct bpf_map *map, const char *path)
7836 {
7837         char *cp, errmsg[STRERR_BUFSIZE];
7838         int err;
7839
7840         if (map == NULL) {
7841                 pr_warn("invalid map pointer\n");
7842                 return libbpf_err(-EINVAL);
7843         }
7844
7845         if (map->pin_path) {
7846                 if (path && strcmp(path, map->pin_path)) {
7847                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7848                                 bpf_map__name(map), map->pin_path, path);
7849                         return libbpf_err(-EINVAL);
7850                 } else if (map->pinned) {
7851                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
7852                                  bpf_map__name(map), map->pin_path);
7853                         return 0;
7854                 }
7855         } else {
7856                 if (!path) {
7857                         pr_warn("missing a path to pin map '%s' at\n",
7858                                 bpf_map__name(map));
7859                         return libbpf_err(-EINVAL);
7860                 } else if (map->pinned) {
7861                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
7862                         return libbpf_err(-EEXIST);
7863                 }
7864
7865                 map->pin_path = strdup(path);
7866                 if (!map->pin_path) {
7867                         err = -errno;
7868                         goto out_err;
7869                 }
7870         }
7871
7872         err = make_parent_dir(map->pin_path);
7873         if (err)
7874                 return libbpf_err(err);
7875
7876         err = check_path(map->pin_path);
7877         if (err)
7878                 return libbpf_err(err);
7879
7880         if (bpf_obj_pin(map->fd, map->pin_path)) {
7881                 err = -errno;
7882                 goto out_err;
7883         }
7884
7885         map->pinned = true;
7886         pr_debug("pinned map '%s'\n", map->pin_path);
7887
7888         return 0;
7889
7890 out_err:
7891         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7892         pr_warn("failed to pin map: %s\n", cp);
7893         return libbpf_err(err);
7894 }
7895
7896 int bpf_map__unpin(struct bpf_map *map, const char *path)
7897 {
7898         int err;
7899
7900         if (map == NULL) {
7901                 pr_warn("invalid map pointer\n");
7902                 return libbpf_err(-EINVAL);
7903         }
7904
7905         if (map->pin_path) {
7906                 if (path && strcmp(path, map->pin_path)) {
7907                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
7908                                 bpf_map__name(map), map->pin_path, path);
7909                         return libbpf_err(-EINVAL);
7910                 }
7911                 path = map->pin_path;
7912         } else if (!path) {
7913                 pr_warn("no path to unpin map '%s' from\n",
7914                         bpf_map__name(map));
7915                 return libbpf_err(-EINVAL);
7916         }
7917
7918         err = check_path(path);
7919         if (err)
7920                 return libbpf_err(err);
7921
7922         err = unlink(path);
7923         if (err != 0)
7924                 return libbpf_err(-errno);
7925
7926         map->pinned = false;
7927         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
7928
7929         return 0;
7930 }
7931
7932 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
7933 {
7934         char *new = NULL;
7935
7936         if (path) {
7937                 new = strdup(path);
7938                 if (!new)
7939                         return libbpf_err(-errno);
7940         }
7941
7942         free(map->pin_path);
7943         map->pin_path = new;
7944         return 0;
7945 }
7946
7947 __alias(bpf_map__pin_path)
7948 const char *bpf_map__get_pin_path(const struct bpf_map *map);
7949
7950 const char *bpf_map__pin_path(const struct bpf_map *map)
7951 {
7952         return map->pin_path;
7953 }
7954
7955 bool bpf_map__is_pinned(const struct bpf_map *map)
7956 {
7957         return map->pinned;
7958 }
7959
7960 static void sanitize_pin_path(char *s)
7961 {
7962         /* bpffs disallows periods in path names */
7963         while (*s) {
7964                 if (*s == '.')
7965                         *s = '_';
7966                 s++;
7967         }
7968 }
7969
7970 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
7971 {
7972         struct bpf_map *map;
7973         int err;
7974
7975         if (!obj)
7976                 return libbpf_err(-ENOENT);
7977
7978         if (!obj->loaded) {
7979                 pr_warn("object not yet loaded; load it first\n");
7980                 return libbpf_err(-ENOENT);
7981         }
7982
7983         bpf_object__for_each_map(map, obj) {
7984                 char *pin_path = NULL;
7985                 char buf[PATH_MAX];
7986
7987                 if (!map->autocreate)
7988                         continue;
7989
7990                 if (path) {
7991                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
7992                         if (err)
7993                                 goto err_unpin_maps;
7994                         sanitize_pin_path(buf);
7995                         pin_path = buf;
7996                 } else if (!map->pin_path) {
7997                         continue;
7998                 }
7999
8000                 err = bpf_map__pin(map, pin_path);
8001                 if (err)
8002                         goto err_unpin_maps;
8003         }
8004
8005         return 0;
8006
8007 err_unpin_maps:
8008         while ((map = bpf_object__prev_map(obj, map))) {
8009                 if (!map->pin_path)
8010                         continue;
8011
8012                 bpf_map__unpin(map, NULL);
8013         }
8014
8015         return libbpf_err(err);
8016 }
8017
8018 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8019 {
8020         struct bpf_map *map;
8021         int err;
8022
8023         if (!obj)
8024                 return libbpf_err(-ENOENT);
8025
8026         bpf_object__for_each_map(map, obj) {
8027                 char *pin_path = NULL;
8028                 char buf[PATH_MAX];
8029
8030                 if (path) {
8031                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8032                         if (err)
8033                                 return libbpf_err(err);
8034                         sanitize_pin_path(buf);
8035                         pin_path = buf;
8036                 } else if (!map->pin_path) {
8037                         continue;
8038                 }
8039
8040                 err = bpf_map__unpin(map, pin_path);
8041                 if (err)
8042                         return libbpf_err(err);
8043         }
8044
8045         return 0;
8046 }
8047
8048 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8049 {
8050         struct bpf_program *prog;
8051         char buf[PATH_MAX];
8052         int err;
8053
8054         if (!obj)
8055                 return libbpf_err(-ENOENT);
8056
8057         if (!obj->loaded) {
8058                 pr_warn("object not yet loaded; load it first\n");
8059                 return libbpf_err(-ENOENT);
8060         }
8061
8062         bpf_object__for_each_program(prog, obj) {
8063                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8064                 if (err)
8065                         goto err_unpin_programs;
8066
8067                 err = bpf_program__pin(prog, buf);
8068                 if (err)
8069                         goto err_unpin_programs;
8070         }
8071
8072         return 0;
8073
8074 err_unpin_programs:
8075         while ((prog = bpf_object__prev_program(obj, prog))) {
8076                 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8077                         continue;
8078
8079                 bpf_program__unpin(prog, buf);
8080         }
8081
8082         return libbpf_err(err);
8083 }
8084
8085 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8086 {
8087         struct bpf_program *prog;
8088         int err;
8089
8090         if (!obj)
8091                 return libbpf_err(-ENOENT);
8092
8093         bpf_object__for_each_program(prog, obj) {
8094                 char buf[PATH_MAX];
8095
8096                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8097                 if (err)
8098                         return libbpf_err(err);
8099
8100                 err = bpf_program__unpin(prog, buf);
8101                 if (err)
8102                         return libbpf_err(err);
8103         }
8104
8105         return 0;
8106 }
8107
8108 int bpf_object__pin(struct bpf_object *obj, const char *path)
8109 {
8110         int err;
8111
8112         err = bpf_object__pin_maps(obj, path);
8113         if (err)
8114                 return libbpf_err(err);
8115
8116         err = bpf_object__pin_programs(obj, path);
8117         if (err) {
8118                 bpf_object__unpin_maps(obj, path);
8119                 return libbpf_err(err);
8120         }
8121
8122         return 0;
8123 }
8124
8125 static void bpf_map__destroy(struct bpf_map *map)
8126 {
8127         if (map->inner_map) {
8128                 bpf_map__destroy(map->inner_map);
8129                 zfree(&map->inner_map);
8130         }
8131
8132         zfree(&map->init_slots);
8133         map->init_slots_sz = 0;
8134
8135         if (map->mmaped) {
8136                 munmap(map->mmaped, bpf_map_mmap_sz(map));
8137                 map->mmaped = NULL;
8138         }
8139
8140         if (map->st_ops) {
8141                 zfree(&map->st_ops->data);
8142                 zfree(&map->st_ops->progs);
8143                 zfree(&map->st_ops->kern_func_off);
8144                 zfree(&map->st_ops);
8145         }
8146
8147         zfree(&map->name);
8148         zfree(&map->real_name);
8149         zfree(&map->pin_path);
8150
8151         if (map->fd >= 0)
8152                 zclose(map->fd);
8153 }
8154
8155 void bpf_object__close(struct bpf_object *obj)
8156 {
8157         size_t i;
8158
8159         if (IS_ERR_OR_NULL(obj))
8160                 return;
8161
8162         usdt_manager_free(obj->usdt_man);
8163         obj->usdt_man = NULL;
8164
8165         bpf_gen__free(obj->gen_loader);
8166         bpf_object__elf_finish(obj);
8167         bpf_object_unload(obj);
8168         btf__free(obj->btf);
8169         btf_ext__free(obj->btf_ext);
8170
8171         for (i = 0; i < obj->nr_maps; i++)
8172                 bpf_map__destroy(&obj->maps[i]);
8173
8174         zfree(&obj->btf_custom_path);
8175         zfree(&obj->kconfig);
8176         zfree(&obj->externs);
8177         obj->nr_extern = 0;
8178
8179         zfree(&obj->maps);
8180         obj->nr_maps = 0;
8181
8182         if (obj->programs && obj->nr_programs) {
8183                 for (i = 0; i < obj->nr_programs; i++)
8184                         bpf_program__exit(&obj->programs[i]);
8185         }
8186         zfree(&obj->programs);
8187
8188         free(obj);
8189 }
8190
8191 const char *bpf_object__name(const struct bpf_object *obj)
8192 {
8193         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8194 }
8195
8196 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8197 {
8198         return obj ? obj->kern_version : 0;
8199 }
8200
8201 struct btf *bpf_object__btf(const struct bpf_object *obj)
8202 {
8203         return obj ? obj->btf : NULL;
8204 }
8205
8206 int bpf_object__btf_fd(const struct bpf_object *obj)
8207 {
8208         return obj->btf ? btf__fd(obj->btf) : -1;
8209 }
8210
8211 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8212 {
8213         if (obj->loaded)
8214                 return libbpf_err(-EINVAL);
8215
8216         obj->kern_version = kern_version;
8217
8218         return 0;
8219 }
8220
8221 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8222 {
8223         struct bpf_gen *gen;
8224
8225         if (!opts)
8226                 return -EFAULT;
8227         if (!OPTS_VALID(opts, gen_loader_opts))
8228                 return -EINVAL;
8229         gen = calloc(sizeof(*gen), 1);
8230         if (!gen)
8231                 return -ENOMEM;
8232         gen->opts = opts;
8233         obj->gen_loader = gen;
8234         return 0;
8235 }
8236
8237 static struct bpf_program *
8238 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8239                     bool forward)
8240 {
8241         size_t nr_programs = obj->nr_programs;
8242         ssize_t idx;
8243
8244         if (!nr_programs)
8245                 return NULL;
8246
8247         if (!p)
8248                 /* Iter from the beginning */
8249                 return forward ? &obj->programs[0] :
8250                         &obj->programs[nr_programs - 1];
8251
8252         if (p->obj != obj) {
8253                 pr_warn("error: program handler doesn't match object\n");
8254                 return errno = EINVAL, NULL;
8255         }
8256
8257         idx = (p - obj->programs) + (forward ? 1 : -1);
8258         if (idx >= obj->nr_programs || idx < 0)
8259                 return NULL;
8260         return &obj->programs[idx];
8261 }
8262
8263 struct bpf_program *
8264 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8265 {
8266         struct bpf_program *prog = prev;
8267
8268         do {
8269                 prog = __bpf_program__iter(prog, obj, true);
8270         } while (prog && prog_is_subprog(obj, prog));
8271
8272         return prog;
8273 }
8274
8275 struct bpf_program *
8276 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8277 {
8278         struct bpf_program *prog = next;
8279
8280         do {
8281                 prog = __bpf_program__iter(prog, obj, false);
8282         } while (prog && prog_is_subprog(obj, prog));
8283
8284         return prog;
8285 }
8286
8287 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8288 {
8289         prog->prog_ifindex = ifindex;
8290 }
8291
8292 const char *bpf_program__name(const struct bpf_program *prog)
8293 {
8294         return prog->name;
8295 }
8296
8297 const char *bpf_program__section_name(const struct bpf_program *prog)
8298 {
8299         return prog->sec_name;
8300 }
8301
8302 bool bpf_program__autoload(const struct bpf_program *prog)
8303 {
8304         return prog->autoload;
8305 }
8306
8307 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8308 {
8309         if (prog->obj->loaded)
8310                 return libbpf_err(-EINVAL);
8311
8312         prog->autoload = autoload;
8313         return 0;
8314 }
8315
8316 bool bpf_program__autoattach(const struct bpf_program *prog)
8317 {
8318         return prog->autoattach;
8319 }
8320
8321 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
8322 {
8323         prog->autoattach = autoattach;
8324 }
8325
8326 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8327 {
8328         return prog->insns;
8329 }
8330
8331 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8332 {
8333         return prog->insns_cnt;
8334 }
8335
8336 int bpf_program__set_insns(struct bpf_program *prog,
8337                            struct bpf_insn *new_insns, size_t new_insn_cnt)
8338 {
8339         struct bpf_insn *insns;
8340
8341         if (prog->obj->loaded)
8342                 return -EBUSY;
8343
8344         insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8345         if (!insns) {
8346                 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8347                 return -ENOMEM;
8348         }
8349         memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8350
8351         prog->insns = insns;
8352         prog->insns_cnt = new_insn_cnt;
8353         return 0;
8354 }
8355
8356 int bpf_program__fd(const struct bpf_program *prog)
8357 {
8358         if (!prog)
8359                 return libbpf_err(-EINVAL);
8360
8361         if (prog->fd < 0)
8362                 return libbpf_err(-ENOENT);
8363
8364         return prog->fd;
8365 }
8366
8367 __alias(bpf_program__type)
8368 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8369
8370 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8371 {
8372         return prog->type;
8373 }
8374
8375 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8376 {
8377         if (prog->obj->loaded)
8378                 return libbpf_err(-EBUSY);
8379
8380         prog->type = type;
8381         return 0;
8382 }
8383
8384 __alias(bpf_program__expected_attach_type)
8385 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
8386
8387 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
8388 {
8389         return prog->expected_attach_type;
8390 }
8391
8392 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
8393                                            enum bpf_attach_type type)
8394 {
8395         if (prog->obj->loaded)
8396                 return libbpf_err(-EBUSY);
8397
8398         prog->expected_attach_type = type;
8399         return 0;
8400 }
8401
8402 __u32 bpf_program__flags(const struct bpf_program *prog)
8403 {
8404         return prog->prog_flags;
8405 }
8406
8407 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
8408 {
8409         if (prog->obj->loaded)
8410                 return libbpf_err(-EBUSY);
8411
8412         prog->prog_flags = flags;
8413         return 0;
8414 }
8415
8416 __u32 bpf_program__log_level(const struct bpf_program *prog)
8417 {
8418         return prog->log_level;
8419 }
8420
8421 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
8422 {
8423         if (prog->obj->loaded)
8424                 return libbpf_err(-EBUSY);
8425
8426         prog->log_level = log_level;
8427         return 0;
8428 }
8429
8430 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
8431 {
8432         *log_size = prog->log_size;
8433         return prog->log_buf;
8434 }
8435
8436 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
8437 {
8438         if (log_size && !log_buf)
8439                 return -EINVAL;
8440         if (prog->log_size > UINT_MAX)
8441                 return -EINVAL;
8442         if (prog->obj->loaded)
8443                 return -EBUSY;
8444
8445         prog->log_buf = log_buf;
8446         prog->log_size = log_size;
8447         return 0;
8448 }
8449
8450 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                        \
8451         .sec = (char *)sec_pfx,                                             \
8452         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
8453         .expected_attach_type = atype,                                      \
8454         .cookie = (long)(flags),                                            \
8455         .prog_prepare_load_fn = libbpf_prepare_prog_load,                   \
8456         __VA_ARGS__                                                         \
8457 }
8458
8459 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8460 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8461 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8462 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8463 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8464 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8465 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8466 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8467 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8468 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8469
8470 static const struct bpf_sec_def section_defs[] = {
8471         SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
8472         SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
8473         SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
8474         SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
8475         SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
8476         SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8477         SEC_DEF("kretprobe+",           KPROBE, 0, SEC_NONE, attach_kprobe),
8478         SEC_DEF("uretprobe+",           KPROBE, 0, SEC_NONE, attach_uprobe),
8479         SEC_DEF("uretprobe.s+",         KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8480         SEC_DEF("kprobe.multi+",        KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8481         SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8482         SEC_DEF("ksyscall+",            KPROBE, 0, SEC_NONE, attach_ksyscall),
8483         SEC_DEF("kretsyscall+",         KPROBE, 0, SEC_NONE, attach_ksyscall),
8484         SEC_DEF("usdt+",                KPROBE, 0, SEC_NONE, attach_usdt),
8485         SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE),
8486         SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE),
8487         SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE),
8488         SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
8489         SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
8490         SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8491         SEC_DEF("raw_tp+",              RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8492         SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8493         SEC_DEF("raw_tp.w+",            RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8494         SEC_DEF("tp_btf+",              TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
8495         SEC_DEF("fentry+",              TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
8496         SEC_DEF("fmod_ret+",            TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
8497         SEC_DEF("fexit+",               TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
8498         SEC_DEF("fentry.s+",            TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8499         SEC_DEF("fmod_ret.s+",          TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8500         SEC_DEF("fexit.s+",             TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8501         SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
8502         SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
8503         SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
8504         SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
8505         SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
8506         SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
8507         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
8508         SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
8509         SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
8510         SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
8511         SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
8512         SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
8513         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
8514         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
8515         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
8516         SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
8517         SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
8518         SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
8519         SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
8520         SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
8521         SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
8522         SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
8523         SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
8524         SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
8525         SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
8526         SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
8527         SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
8528         SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
8529         SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
8530         SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
8531         SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
8532         SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
8533         SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
8534         SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
8535         SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
8536         SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
8537         SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
8538         SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
8539         SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
8540         SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
8541         SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
8542         SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
8543         SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
8544         SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
8545         SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
8546         SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
8547         SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
8548         SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
8549         SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
8550         SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
8551         SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
8552 };
8553
8554 static size_t custom_sec_def_cnt;
8555 static struct bpf_sec_def *custom_sec_defs;
8556 static struct bpf_sec_def custom_fallback_def;
8557 static bool has_custom_fallback_def;
8558
8559 static int last_custom_sec_def_handler_id;
8560
8561 int libbpf_register_prog_handler(const char *sec,
8562                                  enum bpf_prog_type prog_type,
8563                                  enum bpf_attach_type exp_attach_type,
8564                                  const struct libbpf_prog_handler_opts *opts)
8565 {
8566         struct bpf_sec_def *sec_def;
8567
8568         if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
8569                 return libbpf_err(-EINVAL);
8570
8571         if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
8572                 return libbpf_err(-E2BIG);
8573
8574         if (sec) {
8575                 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
8576                                               sizeof(*sec_def));
8577                 if (!sec_def)
8578                         return libbpf_err(-ENOMEM);
8579
8580                 custom_sec_defs = sec_def;
8581                 sec_def = &custom_sec_defs[custom_sec_def_cnt];
8582         } else {
8583                 if (has_custom_fallback_def)
8584                         return libbpf_err(-EBUSY);
8585
8586                 sec_def = &custom_fallback_def;
8587         }
8588
8589         sec_def->sec = sec ? strdup(sec) : NULL;
8590         if (sec && !sec_def->sec)
8591                 return libbpf_err(-ENOMEM);
8592
8593         sec_def->prog_type = prog_type;
8594         sec_def->expected_attach_type = exp_attach_type;
8595         sec_def->cookie = OPTS_GET(opts, cookie, 0);
8596
8597         sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
8598         sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
8599         sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
8600
8601         sec_def->handler_id = ++last_custom_sec_def_handler_id;
8602
8603         if (sec)
8604                 custom_sec_def_cnt++;
8605         else
8606                 has_custom_fallback_def = true;
8607
8608         return sec_def->handler_id;
8609 }
8610
8611 int libbpf_unregister_prog_handler(int handler_id)
8612 {
8613         struct bpf_sec_def *sec_defs;
8614         int i;
8615
8616         if (handler_id <= 0)
8617                 return libbpf_err(-EINVAL);
8618
8619         if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
8620                 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
8621                 has_custom_fallback_def = false;
8622                 return 0;
8623         }
8624
8625         for (i = 0; i < custom_sec_def_cnt; i++) {
8626                 if (custom_sec_defs[i].handler_id == handler_id)
8627                         break;
8628         }
8629
8630         if (i == custom_sec_def_cnt)
8631                 return libbpf_err(-ENOENT);
8632
8633         free(custom_sec_defs[i].sec);
8634         for (i = i + 1; i < custom_sec_def_cnt; i++)
8635                 custom_sec_defs[i - 1] = custom_sec_defs[i];
8636         custom_sec_def_cnt--;
8637
8638         /* try to shrink the array, but it's ok if we couldn't */
8639         sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
8640         if (sec_defs)
8641                 custom_sec_defs = sec_defs;
8642
8643         return 0;
8644 }
8645
8646 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
8647 {
8648         size_t len = strlen(sec_def->sec);
8649
8650         /* "type/" always has to have proper SEC("type/extras") form */
8651         if (sec_def->sec[len - 1] == '/') {
8652                 if (str_has_pfx(sec_name, sec_def->sec))
8653                         return true;
8654                 return false;
8655         }
8656
8657         /* "type+" means it can be either exact SEC("type") or
8658          * well-formed SEC("type/extras") with proper '/' separator
8659          */
8660         if (sec_def->sec[len - 1] == '+') {
8661                 len--;
8662                 /* not even a prefix */
8663                 if (strncmp(sec_name, sec_def->sec, len) != 0)
8664                         return false;
8665                 /* exact match or has '/' separator */
8666                 if (sec_name[len] == '\0' || sec_name[len] == '/')
8667                         return true;
8668                 return false;
8669         }
8670
8671         return strcmp(sec_name, sec_def->sec) == 0;
8672 }
8673
8674 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8675 {
8676         const struct bpf_sec_def *sec_def;
8677         int i, n;
8678
8679         n = custom_sec_def_cnt;
8680         for (i = 0; i < n; i++) {
8681                 sec_def = &custom_sec_defs[i];
8682                 if (sec_def_matches(sec_def, sec_name))
8683                         return sec_def;
8684         }
8685
8686         n = ARRAY_SIZE(section_defs);
8687         for (i = 0; i < n; i++) {
8688                 sec_def = &section_defs[i];
8689                 if (sec_def_matches(sec_def, sec_name))
8690                         return sec_def;
8691         }
8692
8693         if (has_custom_fallback_def)
8694                 return &custom_fallback_def;
8695
8696         return NULL;
8697 }
8698
8699 #define MAX_TYPE_NAME_SIZE 32
8700
8701 static char *libbpf_get_type_names(bool attach_type)
8702 {
8703         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8704         char *buf;
8705
8706         buf = malloc(len);
8707         if (!buf)
8708                 return NULL;
8709
8710         buf[0] = '\0';
8711         /* Forge string buf with all available names */
8712         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8713                 const struct bpf_sec_def *sec_def = &section_defs[i];
8714
8715                 if (attach_type) {
8716                         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
8717                                 continue;
8718
8719                         if (!(sec_def->cookie & SEC_ATTACHABLE))
8720                                 continue;
8721                 }
8722
8723                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8724                         free(buf);
8725                         return NULL;
8726                 }
8727                 strcat(buf, " ");
8728                 strcat(buf, section_defs[i].sec);
8729         }
8730
8731         return buf;
8732 }
8733
8734 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8735                              enum bpf_attach_type *expected_attach_type)
8736 {
8737         const struct bpf_sec_def *sec_def;
8738         char *type_names;
8739
8740         if (!name)
8741                 return libbpf_err(-EINVAL);
8742
8743         sec_def = find_sec_def(name);
8744         if (sec_def) {
8745                 *prog_type = sec_def->prog_type;
8746                 *expected_attach_type = sec_def->expected_attach_type;
8747                 return 0;
8748         }
8749
8750         pr_debug("failed to guess program type from ELF section '%s'\n", name);
8751         type_names = libbpf_get_type_names(false);
8752         if (type_names != NULL) {
8753                 pr_debug("supported section(type) names are:%s\n", type_names);
8754                 free(type_names);
8755         }
8756
8757         return libbpf_err(-ESRCH);
8758 }
8759
8760 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
8761 {
8762         if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
8763                 return NULL;
8764
8765         return attach_type_name[t];
8766 }
8767
8768 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
8769 {
8770         if (t < 0 || t >= ARRAY_SIZE(link_type_name))
8771                 return NULL;
8772
8773         return link_type_name[t];
8774 }
8775
8776 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
8777 {
8778         if (t < 0 || t >= ARRAY_SIZE(map_type_name))
8779                 return NULL;
8780
8781         return map_type_name[t];
8782 }
8783
8784 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
8785 {
8786         if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
8787                 return NULL;
8788
8789         return prog_type_name[t];
8790 }
8791
8792 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8793                                                      size_t offset)
8794 {
8795         struct bpf_map *map;
8796         size_t i;
8797
8798         for (i = 0; i < obj->nr_maps; i++) {
8799                 map = &obj->maps[i];
8800                 if (!bpf_map__is_struct_ops(map))
8801                         continue;
8802                 if (map->sec_offset <= offset &&
8803                     offset - map->sec_offset < map->def.value_size)
8804                         return map;
8805         }
8806
8807         return NULL;
8808 }
8809
8810 /* Collect the reloc from ELF and populate the st_ops->progs[] */
8811 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8812                                             Elf64_Shdr *shdr, Elf_Data *data)
8813 {
8814         const struct btf_member *member;
8815         struct bpf_struct_ops *st_ops;
8816         struct bpf_program *prog;
8817         unsigned int shdr_idx;
8818         const struct btf *btf;
8819         struct bpf_map *map;
8820         unsigned int moff, insn_idx;
8821         const char *name;
8822         __u32 member_idx;
8823         Elf64_Sym *sym;
8824         Elf64_Rel *rel;
8825         int i, nrels;
8826
8827         btf = obj->btf;
8828         nrels = shdr->sh_size / shdr->sh_entsize;
8829         for (i = 0; i < nrels; i++) {
8830                 rel = elf_rel_by_idx(data, i);
8831                 if (!rel) {
8832                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8833                         return -LIBBPF_ERRNO__FORMAT;
8834                 }
8835
8836                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
8837                 if (!sym) {
8838                         pr_warn("struct_ops reloc: symbol %zx not found\n",
8839                                 (size_t)ELF64_R_SYM(rel->r_info));
8840                         return -LIBBPF_ERRNO__FORMAT;
8841                 }
8842
8843                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
8844                 map = find_struct_ops_map_by_offset(obj, rel->r_offset);
8845                 if (!map) {
8846                         pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
8847                                 (size_t)rel->r_offset);
8848                         return -EINVAL;
8849                 }
8850
8851                 moff = rel->r_offset - map->sec_offset;
8852                 shdr_idx = sym->st_shndx;
8853                 st_ops = map->st_ops;
8854                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
8855                          map->name,
8856                          (long long)(rel->r_info >> 32),
8857                          (long long)sym->st_value,
8858                          shdr_idx, (size_t)rel->r_offset,
8859                          map->sec_offset, sym->st_name, name);
8860
8861                 if (shdr_idx >= SHN_LORESERVE) {
8862                         pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
8863                                 map->name, (size_t)rel->r_offset, shdr_idx);
8864                         return -LIBBPF_ERRNO__RELOC;
8865                 }
8866                 if (sym->st_value % BPF_INSN_SZ) {
8867                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
8868                                 map->name, (unsigned long long)sym->st_value);
8869                         return -LIBBPF_ERRNO__FORMAT;
8870                 }
8871                 insn_idx = sym->st_value / BPF_INSN_SZ;
8872
8873                 member = find_member_by_offset(st_ops->type, moff * 8);
8874                 if (!member) {
8875                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
8876                                 map->name, moff);
8877                         return -EINVAL;
8878                 }
8879                 member_idx = member - btf_members(st_ops->type);
8880                 name = btf__name_by_offset(btf, member->name_off);
8881
8882                 if (!resolve_func_ptr(btf, member->type, NULL)) {
8883                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
8884                                 map->name, name);
8885                         return -EINVAL;
8886                 }
8887
8888                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
8889                 if (!prog) {
8890                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
8891                                 map->name, shdr_idx, name);
8892                         return -EINVAL;
8893                 }
8894
8895                 /* prevent the use of BPF prog with invalid type */
8896                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
8897                         pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
8898                                 map->name, prog->name);
8899                         return -EINVAL;
8900                 }
8901
8902                 /* if we haven't yet processed this BPF program, record proper
8903                  * attach_btf_id and member_idx
8904                  */
8905                 if (!prog->attach_btf_id) {
8906                         prog->attach_btf_id = st_ops->type_id;
8907                         prog->expected_attach_type = member_idx;
8908                 }
8909
8910                 /* struct_ops BPF prog can be re-used between multiple
8911                  * .struct_ops as long as it's the same struct_ops struct
8912                  * definition and the same function pointer field
8913                  */
8914                 if (prog->attach_btf_id != st_ops->type_id ||
8915                     prog->expected_attach_type != member_idx) {
8916                         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
8917                                 map->name, prog->name, prog->sec_name, prog->type,
8918                                 prog->attach_btf_id, prog->expected_attach_type, name);
8919                         return -EINVAL;
8920                 }
8921
8922                 st_ops->progs[member_idx] = prog;
8923         }
8924
8925         return 0;
8926 }
8927
8928 #define BTF_TRACE_PREFIX "btf_trace_"
8929 #define BTF_LSM_PREFIX "bpf_lsm_"
8930 #define BTF_ITER_PREFIX "bpf_iter_"
8931 #define BTF_MAX_NAME_SIZE 128
8932
8933 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
8934                                 const char **prefix, int *kind)
8935 {
8936         switch (attach_type) {
8937         case BPF_TRACE_RAW_TP:
8938                 *prefix = BTF_TRACE_PREFIX;
8939                 *kind = BTF_KIND_TYPEDEF;
8940                 break;
8941         case BPF_LSM_MAC:
8942         case BPF_LSM_CGROUP:
8943                 *prefix = BTF_LSM_PREFIX;
8944                 *kind = BTF_KIND_FUNC;
8945                 break;
8946         case BPF_TRACE_ITER:
8947                 *prefix = BTF_ITER_PREFIX;
8948                 *kind = BTF_KIND_FUNC;
8949                 break;
8950         default:
8951                 *prefix = "";
8952                 *kind = BTF_KIND_FUNC;
8953         }
8954 }
8955
8956 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
8957                                    const char *name, __u32 kind)
8958 {
8959         char btf_type_name[BTF_MAX_NAME_SIZE];
8960         int ret;
8961
8962         ret = snprintf(btf_type_name, sizeof(btf_type_name),
8963                        "%s%s", prefix, name);
8964         /* snprintf returns the number of characters written excluding the
8965          * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
8966          * indicates truncation.
8967          */
8968         if (ret < 0 || ret >= sizeof(btf_type_name))
8969                 return -ENAMETOOLONG;
8970         return btf__find_by_name_kind(btf, btf_type_name, kind);
8971 }
8972
8973 static inline int find_attach_btf_id(struct btf *btf, const char *name,
8974                                      enum bpf_attach_type attach_type)
8975 {
8976         const char *prefix;
8977         int kind;
8978
8979         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
8980         return find_btf_by_prefix_kind(btf, prefix, name, kind);
8981 }
8982
8983 int libbpf_find_vmlinux_btf_id(const char *name,
8984                                enum bpf_attach_type attach_type)
8985 {
8986         struct btf *btf;
8987         int err;
8988
8989         btf = btf__load_vmlinux_btf();
8990         err = libbpf_get_error(btf);
8991         if (err) {
8992                 pr_warn("vmlinux BTF is not found\n");
8993                 return libbpf_err(err);
8994         }
8995
8996         err = find_attach_btf_id(btf, name, attach_type);
8997         if (err <= 0)
8998                 pr_warn("%s is not found in vmlinux BTF\n", name);
8999
9000         btf__free(btf);
9001         return libbpf_err(err);
9002 }
9003
9004 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9005 {
9006         struct bpf_prog_info info;
9007         __u32 info_len = sizeof(info);
9008         struct btf *btf;
9009         int err;
9010
9011         memset(&info, 0, info_len);
9012         err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
9013         if (err) {
9014                 pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
9015                         attach_prog_fd, err);
9016                 return err;
9017         }
9018
9019         err = -EINVAL;
9020         if (!info.btf_id) {
9021                 pr_warn("The target program doesn't have BTF\n");
9022                 goto out;
9023         }
9024         btf = btf__load_from_kernel_by_id(info.btf_id);
9025         err = libbpf_get_error(btf);
9026         if (err) {
9027                 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9028                 goto out;
9029         }
9030         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9031         btf__free(btf);
9032         if (err <= 0) {
9033                 pr_warn("%s is not found in prog's BTF\n", name);
9034                 goto out;
9035         }
9036 out:
9037         return err;
9038 }
9039
9040 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9041                               enum bpf_attach_type attach_type,
9042                               int *btf_obj_fd, int *btf_type_id)
9043 {
9044         int ret, i;
9045
9046         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9047         if (ret > 0) {
9048                 *btf_obj_fd = 0; /* vmlinux BTF */
9049                 *btf_type_id = ret;
9050                 return 0;
9051         }
9052         if (ret != -ENOENT)
9053                 return ret;
9054
9055         ret = load_module_btfs(obj);
9056         if (ret)
9057                 return ret;
9058
9059         for (i = 0; i < obj->btf_module_cnt; i++) {
9060                 const struct module_btf *mod = &obj->btf_modules[i];
9061
9062                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9063                 if (ret > 0) {
9064                         *btf_obj_fd = mod->fd;
9065                         *btf_type_id = ret;
9066                         return 0;
9067                 }
9068                 if (ret == -ENOENT)
9069                         continue;
9070
9071                 return ret;
9072         }
9073
9074         return -ESRCH;
9075 }
9076
9077 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9078                                      int *btf_obj_fd, int *btf_type_id)
9079 {
9080         enum bpf_attach_type attach_type = prog->expected_attach_type;
9081         __u32 attach_prog_fd = prog->attach_prog_fd;
9082         int err = 0;
9083
9084         /* BPF program's BTF ID */
9085         if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9086                 if (!attach_prog_fd) {
9087                         pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9088                         return -EINVAL;
9089                 }
9090                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9091                 if (err < 0) {
9092                         pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9093                                  prog->name, attach_prog_fd, attach_name, err);
9094                         return err;
9095                 }
9096                 *btf_obj_fd = 0;
9097                 *btf_type_id = err;
9098                 return 0;
9099         }
9100
9101         /* kernel/module BTF ID */
9102         if (prog->obj->gen_loader) {
9103                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9104                 *btf_obj_fd = 0;
9105                 *btf_type_id = 1;
9106         } else {
9107                 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9108         }
9109         if (err) {
9110                 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9111                         prog->name, attach_name, err);
9112                 return err;
9113         }
9114         return 0;
9115 }
9116
9117 int libbpf_attach_type_by_name(const char *name,
9118                                enum bpf_attach_type *attach_type)
9119 {
9120         char *type_names;
9121         const struct bpf_sec_def *sec_def;
9122
9123         if (!name)
9124                 return libbpf_err(-EINVAL);
9125
9126         sec_def = find_sec_def(name);
9127         if (!sec_def) {
9128                 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9129                 type_names = libbpf_get_type_names(true);
9130                 if (type_names != NULL) {
9131                         pr_debug("attachable section(type) names are:%s\n", type_names);
9132                         free(type_names);
9133                 }
9134
9135                 return libbpf_err(-EINVAL);
9136         }
9137
9138         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9139                 return libbpf_err(-EINVAL);
9140         if (!(sec_def->cookie & SEC_ATTACHABLE))
9141                 return libbpf_err(-EINVAL);
9142
9143         *attach_type = sec_def->expected_attach_type;
9144         return 0;
9145 }
9146
9147 int bpf_map__fd(const struct bpf_map *map)
9148 {
9149         return map ? map->fd : libbpf_err(-EINVAL);
9150 }
9151
9152 static bool map_uses_real_name(const struct bpf_map *map)
9153 {
9154         /* Since libbpf started to support custom .data.* and .rodata.* maps,
9155          * their user-visible name differs from kernel-visible name. Users see
9156          * such map's corresponding ELF section name as a map name.
9157          * This check distinguishes .data/.rodata from .data.* and .rodata.*
9158          * maps to know which name has to be returned to the user.
9159          */
9160         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9161                 return true;
9162         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9163                 return true;
9164         return false;
9165 }
9166
9167 const char *bpf_map__name(const struct bpf_map *map)
9168 {
9169         if (!map)
9170                 return NULL;
9171
9172         if (map_uses_real_name(map))
9173                 return map->real_name;
9174
9175         return map->name;
9176 }
9177
9178 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9179 {
9180         return map->def.type;
9181 }
9182
9183 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9184 {
9185         if (map->fd >= 0)
9186                 return libbpf_err(-EBUSY);
9187         map->def.type = type;
9188         return 0;
9189 }
9190
9191 __u32 bpf_map__map_flags(const struct bpf_map *map)
9192 {
9193         return map->def.map_flags;
9194 }
9195
9196 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9197 {
9198         if (map->fd >= 0)
9199                 return libbpf_err(-EBUSY);
9200         map->def.map_flags = flags;
9201         return 0;
9202 }
9203
9204 __u64 bpf_map__map_extra(const struct bpf_map *map)
9205 {
9206         return map->map_extra;
9207 }
9208
9209 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9210 {
9211         if (map->fd >= 0)
9212                 return libbpf_err(-EBUSY);
9213         map->map_extra = map_extra;
9214         return 0;
9215 }
9216
9217 __u32 bpf_map__numa_node(const struct bpf_map *map)
9218 {
9219         return map->numa_node;
9220 }
9221
9222 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9223 {
9224         if (map->fd >= 0)
9225                 return libbpf_err(-EBUSY);
9226         map->numa_node = numa_node;
9227         return 0;
9228 }
9229
9230 __u32 bpf_map__key_size(const struct bpf_map *map)
9231 {
9232         return map->def.key_size;
9233 }
9234
9235 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9236 {
9237         if (map->fd >= 0)
9238                 return libbpf_err(-EBUSY);
9239         map->def.key_size = size;
9240         return 0;
9241 }
9242
9243 __u32 bpf_map__value_size(const struct bpf_map *map)
9244 {
9245         return map->def.value_size;
9246 }
9247
9248 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9249 {
9250         if (map->fd >= 0)
9251                 return libbpf_err(-EBUSY);
9252         map->def.value_size = size;
9253         return 0;
9254 }
9255
9256 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9257 {
9258         return map ? map->btf_key_type_id : 0;
9259 }
9260
9261 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9262 {
9263         return map ? map->btf_value_type_id : 0;
9264 }
9265
9266 int bpf_map__set_initial_value(struct bpf_map *map,
9267                                const void *data, size_t size)
9268 {
9269         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9270             size != map->def.value_size || map->fd >= 0)
9271                 return libbpf_err(-EINVAL);
9272
9273         memcpy(map->mmaped, data, size);
9274         return 0;
9275 }
9276
9277 const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9278 {
9279         if (!map->mmaped)
9280                 return NULL;
9281         *psize = map->def.value_size;
9282         return map->mmaped;
9283 }
9284
9285 bool bpf_map__is_internal(const struct bpf_map *map)
9286 {
9287         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9288 }
9289
9290 __u32 bpf_map__ifindex(const struct bpf_map *map)
9291 {
9292         return map->map_ifindex;
9293 }
9294
9295 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9296 {
9297         if (map->fd >= 0)
9298                 return libbpf_err(-EBUSY);
9299         map->map_ifindex = ifindex;
9300         return 0;
9301 }
9302
9303 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9304 {
9305         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9306                 pr_warn("error: unsupported map type\n");
9307                 return libbpf_err(-EINVAL);
9308         }
9309         if (map->inner_map_fd != -1) {
9310                 pr_warn("error: inner_map_fd already specified\n");
9311                 return libbpf_err(-EINVAL);
9312         }
9313         if (map->inner_map) {
9314                 bpf_map__destroy(map->inner_map);
9315                 zfree(&map->inner_map);
9316         }
9317         map->inner_map_fd = fd;
9318         return 0;
9319 }
9320
9321 static struct bpf_map *
9322 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9323 {
9324         ssize_t idx;
9325         struct bpf_map *s, *e;
9326
9327         if (!obj || !obj->maps)
9328                 return errno = EINVAL, NULL;
9329
9330         s = obj->maps;
9331         e = obj->maps + obj->nr_maps;
9332
9333         if ((m < s) || (m >= e)) {
9334                 pr_warn("error in %s: map handler doesn't belong to object\n",
9335                          __func__);
9336                 return errno = EINVAL, NULL;
9337         }
9338
9339         idx = (m - obj->maps) + i;
9340         if (idx >= obj->nr_maps || idx < 0)
9341                 return NULL;
9342         return &obj->maps[idx];
9343 }
9344
9345 struct bpf_map *
9346 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
9347 {
9348         if (prev == NULL)
9349                 return obj->maps;
9350
9351         return __bpf_map__iter(prev, obj, 1);
9352 }
9353
9354 struct bpf_map *
9355 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
9356 {
9357         if (next == NULL) {
9358                 if (!obj->nr_maps)
9359                         return NULL;
9360                 return obj->maps + obj->nr_maps - 1;
9361         }
9362
9363         return __bpf_map__iter(next, obj, -1);
9364 }
9365
9366 struct bpf_map *
9367 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9368 {
9369         struct bpf_map *pos;
9370
9371         bpf_object__for_each_map(pos, obj) {
9372                 /* if it's a special internal map name (which always starts
9373                  * with dot) then check if that special name matches the
9374                  * real map name (ELF section name)
9375                  */
9376                 if (name[0] == '.') {
9377                         if (pos->real_name && strcmp(pos->real_name, name) == 0)
9378                                 return pos;
9379                         continue;
9380                 }
9381                 /* otherwise map name has to be an exact match */
9382                 if (map_uses_real_name(pos)) {
9383                         if (strcmp(pos->real_name, name) == 0)
9384                                 return pos;
9385                         continue;
9386                 }
9387                 if (strcmp(pos->name, name) == 0)
9388                         return pos;
9389         }
9390         return errno = ENOENT, NULL;
9391 }
9392
9393 int
9394 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9395 {
9396         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9397 }
9398
9399 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
9400                            size_t value_sz, bool check_value_sz)
9401 {
9402         if (map->fd <= 0)
9403                 return -ENOENT;
9404
9405         if (map->def.key_size != key_sz) {
9406                 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
9407                         map->name, key_sz, map->def.key_size);
9408                 return -EINVAL;
9409         }
9410
9411         if (!check_value_sz)
9412                 return 0;
9413
9414         switch (map->def.type) {
9415         case BPF_MAP_TYPE_PERCPU_ARRAY:
9416         case BPF_MAP_TYPE_PERCPU_HASH:
9417         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
9418         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
9419                 int num_cpu = libbpf_num_possible_cpus();
9420                 size_t elem_sz = roundup(map->def.value_size, 8);
9421
9422                 if (value_sz != num_cpu * elem_sz) {
9423                         pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
9424                                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
9425                         return -EINVAL;
9426                 }
9427                 break;
9428         }
9429         default:
9430                 if (map->def.value_size != value_sz) {
9431                         pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
9432                                 map->name, value_sz, map->def.value_size);
9433                         return -EINVAL;
9434                 }
9435                 break;
9436         }
9437         return 0;
9438 }
9439
9440 int bpf_map__lookup_elem(const struct bpf_map *map,
9441                          const void *key, size_t key_sz,
9442                          void *value, size_t value_sz, __u64 flags)
9443 {
9444         int err;
9445
9446         err = validate_map_op(map, key_sz, value_sz, true);
9447         if (err)
9448                 return libbpf_err(err);
9449
9450         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
9451 }
9452
9453 int bpf_map__update_elem(const struct bpf_map *map,
9454                          const void *key, size_t key_sz,
9455                          const void *value, size_t value_sz, __u64 flags)
9456 {
9457         int err;
9458
9459         err = validate_map_op(map, key_sz, value_sz, true);
9460         if (err)
9461                 return libbpf_err(err);
9462
9463         return bpf_map_update_elem(map->fd, key, value, flags);
9464 }
9465
9466 int bpf_map__delete_elem(const struct bpf_map *map,
9467                          const void *key, size_t key_sz, __u64 flags)
9468 {
9469         int err;
9470
9471         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9472         if (err)
9473                 return libbpf_err(err);
9474
9475         return bpf_map_delete_elem_flags(map->fd, key, flags);
9476 }
9477
9478 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
9479                                     const void *key, size_t key_sz,
9480                                     void *value, size_t value_sz, __u64 flags)
9481 {
9482         int err;
9483
9484         err = validate_map_op(map, key_sz, value_sz, true);
9485         if (err)
9486                 return libbpf_err(err);
9487
9488         return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
9489 }
9490
9491 int bpf_map__get_next_key(const struct bpf_map *map,
9492                           const void *cur_key, void *next_key, size_t key_sz)
9493 {
9494         int err;
9495
9496         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9497         if (err)
9498                 return libbpf_err(err);
9499
9500         return bpf_map_get_next_key(map->fd, cur_key, next_key);
9501 }
9502
9503 long libbpf_get_error(const void *ptr)
9504 {
9505         if (!IS_ERR_OR_NULL(ptr))
9506                 return 0;
9507
9508         if (IS_ERR(ptr))
9509                 errno = -PTR_ERR(ptr);
9510
9511         /* If ptr == NULL, then errno should be already set by the failing
9512          * API, because libbpf never returns NULL on success and it now always
9513          * sets errno on error. So no extra errno handling for ptr == NULL
9514          * case.
9515          */
9516         return -errno;
9517 }
9518
9519 /* Replace link's underlying BPF program with the new one */
9520 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
9521 {
9522         int ret;
9523
9524         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9525         return libbpf_err_errno(ret);
9526 }
9527
9528 /* Release "ownership" of underlying BPF resource (typically, BPF program
9529  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9530  * link, when destructed through bpf_link__destroy() call won't attempt to
9531  * detach/unregisted that BPF resource. This is useful in situations where,
9532  * say, attached BPF program has to outlive userspace program that attached it
9533  * in the system. Depending on type of BPF program, though, there might be
9534  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9535  * exit of userspace program doesn't trigger automatic detachment and clean up
9536  * inside the kernel.
9537  */
9538 void bpf_link__disconnect(struct bpf_link *link)
9539 {
9540         link->disconnected = true;
9541 }
9542
9543 int bpf_link__destroy(struct bpf_link *link)
9544 {
9545         int err = 0;
9546
9547         if (IS_ERR_OR_NULL(link))
9548                 return 0;
9549
9550         if (!link->disconnected && link->detach)
9551                 err = link->detach(link);
9552         if (link->pin_path)
9553                 free(link->pin_path);
9554         if (link->dealloc)
9555                 link->dealloc(link);
9556         else
9557                 free(link);
9558
9559         return libbpf_err(err);
9560 }
9561
9562 int bpf_link__fd(const struct bpf_link *link)
9563 {
9564         return link->fd;
9565 }
9566
9567 const char *bpf_link__pin_path(const struct bpf_link *link)
9568 {
9569         return link->pin_path;
9570 }
9571
9572 static int bpf_link__detach_fd(struct bpf_link *link)
9573 {
9574         return libbpf_err_errno(close(link->fd));
9575 }
9576
9577 struct bpf_link *bpf_link__open(const char *path)
9578 {
9579         struct bpf_link *link;
9580         int fd;
9581
9582         fd = bpf_obj_get(path);
9583         if (fd < 0) {
9584                 fd = -errno;
9585                 pr_warn("failed to open link at %s: %d\n", path, fd);
9586                 return libbpf_err_ptr(fd);
9587         }
9588
9589         link = calloc(1, sizeof(*link));
9590         if (!link) {
9591                 close(fd);
9592                 return libbpf_err_ptr(-ENOMEM);
9593         }
9594         link->detach = &bpf_link__detach_fd;
9595         link->fd = fd;
9596
9597         link->pin_path = strdup(path);
9598         if (!link->pin_path) {
9599                 bpf_link__destroy(link);
9600                 return libbpf_err_ptr(-ENOMEM);
9601         }
9602
9603         return link;
9604 }
9605
9606 int bpf_link__detach(struct bpf_link *link)
9607 {
9608         return bpf_link_detach(link->fd) ? -errno : 0;
9609 }
9610
9611 int bpf_link__pin(struct bpf_link *link, const char *path)
9612 {
9613         int err;
9614
9615         if (link->pin_path)
9616                 return libbpf_err(-EBUSY);
9617         err = make_parent_dir(path);
9618         if (err)
9619                 return libbpf_err(err);
9620         err = check_path(path);
9621         if (err)
9622                 return libbpf_err(err);
9623
9624         link->pin_path = strdup(path);
9625         if (!link->pin_path)
9626                 return libbpf_err(-ENOMEM);
9627
9628         if (bpf_obj_pin(link->fd, link->pin_path)) {
9629                 err = -errno;
9630                 zfree(&link->pin_path);
9631                 return libbpf_err(err);
9632         }
9633
9634         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9635         return 0;
9636 }
9637
9638 int bpf_link__unpin(struct bpf_link *link)
9639 {
9640         int err;
9641
9642         if (!link->pin_path)
9643                 return libbpf_err(-EINVAL);
9644
9645         err = unlink(link->pin_path);
9646         if (err != 0)
9647                 return -errno;
9648
9649         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9650         zfree(&link->pin_path);
9651         return 0;
9652 }
9653
9654 struct bpf_link_perf {
9655         struct bpf_link link;
9656         int perf_event_fd;
9657         /* legacy kprobe support: keep track of probe identifier and type */
9658         char *legacy_probe_name;
9659         bool legacy_is_kprobe;
9660         bool legacy_is_retprobe;
9661 };
9662
9663 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
9664 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
9665
9666 static int bpf_link_perf_detach(struct bpf_link *link)
9667 {
9668         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9669         int err = 0;
9670
9671         if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
9672                 err = -errno;
9673
9674         if (perf_link->perf_event_fd != link->fd)
9675                 close(perf_link->perf_event_fd);
9676         close(link->fd);
9677
9678         /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
9679         if (perf_link->legacy_probe_name) {
9680                 if (perf_link->legacy_is_kprobe) {
9681                         err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
9682                                                          perf_link->legacy_is_retprobe);
9683                 } else {
9684                         err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
9685                                                          perf_link->legacy_is_retprobe);
9686                 }
9687         }
9688
9689         return err;
9690 }
9691
9692 static void bpf_link_perf_dealloc(struct bpf_link *link)
9693 {
9694         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9695
9696         free(perf_link->legacy_probe_name);
9697         free(perf_link);
9698 }
9699
9700 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
9701                                                      const struct bpf_perf_event_opts *opts)
9702 {
9703         char errmsg[STRERR_BUFSIZE];
9704         struct bpf_link_perf *link;
9705         int prog_fd, link_fd = -1, err;
9706
9707         if (!OPTS_VALID(opts, bpf_perf_event_opts))
9708                 return libbpf_err_ptr(-EINVAL);
9709
9710         if (pfd < 0) {
9711                 pr_warn("prog '%s': invalid perf event FD %d\n",
9712                         prog->name, pfd);
9713                 return libbpf_err_ptr(-EINVAL);
9714         }
9715         prog_fd = bpf_program__fd(prog);
9716         if (prog_fd < 0) {
9717                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9718                         prog->name);
9719                 return libbpf_err_ptr(-EINVAL);
9720         }
9721
9722         link = calloc(1, sizeof(*link));
9723         if (!link)
9724                 return libbpf_err_ptr(-ENOMEM);
9725         link->link.detach = &bpf_link_perf_detach;
9726         link->link.dealloc = &bpf_link_perf_dealloc;
9727         link->perf_event_fd = pfd;
9728
9729         if (kernel_supports(prog->obj, FEAT_PERF_LINK)) {
9730                 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
9731                         .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
9732
9733                 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
9734                 if (link_fd < 0) {
9735                         err = -errno;
9736                         pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
9737                                 prog->name, pfd,
9738                                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9739                         goto err_out;
9740                 }
9741                 link->link.fd = link_fd;
9742         } else {
9743                 if (OPTS_GET(opts, bpf_cookie, 0)) {
9744                         pr_warn("prog '%s': user context value is not supported\n", prog->name);
9745                         err = -EOPNOTSUPP;
9746                         goto err_out;
9747                 }
9748
9749                 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9750                         err = -errno;
9751                         pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
9752                                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9753                         if (err == -EPROTO)
9754                                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9755                                         prog->name, pfd);
9756                         goto err_out;
9757                 }
9758                 link->link.fd = pfd;
9759         }
9760         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9761                 err = -errno;
9762                 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
9763                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9764                 goto err_out;
9765         }
9766
9767         return &link->link;
9768 err_out:
9769         if (link_fd >= 0)
9770                 close(link_fd);
9771         free(link);
9772         return libbpf_err_ptr(err);
9773 }
9774
9775 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
9776 {
9777         return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
9778 }
9779
9780 /*
9781  * this function is expected to parse integer in the range of [0, 2^31-1] from
9782  * given file using scanf format string fmt. If actual parsed value is
9783  * negative, the result might be indistinguishable from error
9784  */
9785 static int parse_uint_from_file(const char *file, const char *fmt)
9786 {
9787         char buf[STRERR_BUFSIZE];
9788         int err, ret;
9789         FILE *f;
9790
9791         f = fopen(file, "r");
9792         if (!f) {
9793                 err = -errno;
9794                 pr_debug("failed to open '%s': %s\n", file,
9795                          libbpf_strerror_r(err, buf, sizeof(buf)));
9796                 return err;
9797         }
9798         err = fscanf(f, fmt, &ret);
9799         if (err != 1) {
9800                 err = err == EOF ? -EIO : -errno;
9801                 pr_debug("failed to parse '%s': %s\n", file,
9802                         libbpf_strerror_r(err, buf, sizeof(buf)));
9803                 fclose(f);
9804                 return err;
9805         }
9806         fclose(f);
9807         return ret;
9808 }
9809
9810 static int determine_kprobe_perf_type(void)
9811 {
9812         const char *file = "/sys/bus/event_source/devices/kprobe/type";
9813
9814         return parse_uint_from_file(file, "%d\n");
9815 }
9816
9817 static int determine_uprobe_perf_type(void)
9818 {
9819         const char *file = "/sys/bus/event_source/devices/uprobe/type";
9820
9821         return parse_uint_from_file(file, "%d\n");
9822 }
9823
9824 static int determine_kprobe_retprobe_bit(void)
9825 {
9826         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9827
9828         return parse_uint_from_file(file, "config:%d\n");
9829 }
9830
9831 static int determine_uprobe_retprobe_bit(void)
9832 {
9833         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
9834
9835         return parse_uint_from_file(file, "config:%d\n");
9836 }
9837
9838 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
9839 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
9840
9841 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
9842                                  uint64_t offset, int pid, size_t ref_ctr_off)
9843 {
9844         const size_t attr_sz = sizeof(struct perf_event_attr);
9845         struct perf_event_attr attr;
9846         char errmsg[STRERR_BUFSIZE];
9847         int type, pfd;
9848
9849         if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
9850                 return -EINVAL;
9851
9852         memset(&attr, 0, attr_sz);
9853
9854         type = uprobe ? determine_uprobe_perf_type()
9855                       : determine_kprobe_perf_type();
9856         if (type < 0) {
9857                 pr_warn("failed to determine %s perf type: %s\n",
9858                         uprobe ? "uprobe" : "kprobe",
9859                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
9860                 return type;
9861         }
9862         if (retprobe) {
9863                 int bit = uprobe ? determine_uprobe_retprobe_bit()
9864                                  : determine_kprobe_retprobe_bit();
9865
9866                 if (bit < 0) {
9867                         pr_warn("failed to determine %s retprobe bit: %s\n",
9868                                 uprobe ? "uprobe" : "kprobe",
9869                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
9870                         return bit;
9871                 }
9872                 attr.config |= 1 << bit;
9873         }
9874         attr.size = attr_sz;
9875         attr.type = type;
9876         attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
9877         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
9878         attr.config2 = offset;           /* kprobe_addr or probe_offset */
9879
9880         /* pid filter is meaningful only for uprobes */
9881         pfd = syscall(__NR_perf_event_open, &attr,
9882                       pid < 0 ? -1 : pid /* pid */,
9883                       pid == -1 ? 0 : -1 /* cpu */,
9884                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
9885         return pfd >= 0 ? pfd : -errno;
9886 }
9887
9888 static int append_to_file(const char *file, const char *fmt, ...)
9889 {
9890         int fd, n, err = 0;
9891         va_list ap;
9892
9893         fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
9894         if (fd < 0)
9895                 return -errno;
9896
9897         va_start(ap, fmt);
9898         n = vdprintf(fd, fmt, ap);
9899         va_end(ap);
9900
9901         if (n < 0)
9902                 err = -errno;
9903
9904         close(fd);
9905         return err;
9906 }
9907
9908 #define DEBUGFS "/sys/kernel/debug/tracing"
9909 #define TRACEFS "/sys/kernel/tracing"
9910
9911 static bool use_debugfs(void)
9912 {
9913         static int has_debugfs = -1;
9914
9915         if (has_debugfs < 0)
9916                 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
9917
9918         return has_debugfs == 1;
9919 }
9920
9921 static const char *tracefs_path(void)
9922 {
9923         return use_debugfs() ? DEBUGFS : TRACEFS;
9924 }
9925
9926 static const char *tracefs_kprobe_events(void)
9927 {
9928         return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
9929 }
9930
9931 static const char *tracefs_uprobe_events(void)
9932 {
9933         return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
9934 }
9935
9936 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
9937                                          const char *kfunc_name, size_t offset)
9938 {
9939         static int index = 0;
9940
9941         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
9942                  __sync_fetch_and_add(&index, 1));
9943 }
9944
9945 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
9946                                    const char *kfunc_name, size_t offset)
9947 {
9948         return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
9949                               retprobe ? 'r' : 'p',
9950                               retprobe ? "kretprobes" : "kprobes",
9951                               probe_name, kfunc_name, offset);
9952 }
9953
9954 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
9955 {
9956         return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
9957                               retprobe ? "kretprobes" : "kprobes", probe_name);
9958 }
9959
9960 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
9961 {
9962         char file[256];
9963
9964         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
9965                  tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
9966
9967         return parse_uint_from_file(file, "%d\n");
9968 }
9969
9970 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
9971                                          const char *kfunc_name, size_t offset, int pid)
9972 {
9973         const size_t attr_sz = sizeof(struct perf_event_attr);
9974         struct perf_event_attr attr;
9975         char errmsg[STRERR_BUFSIZE];
9976         int type, pfd, err;
9977
9978         err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
9979         if (err < 0) {
9980                 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
9981                         kfunc_name, offset,
9982                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9983                 return err;
9984         }
9985         type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
9986         if (type < 0) {
9987                 err = type;
9988                 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
9989                         kfunc_name, offset,
9990                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9991                 goto err_clean_legacy;
9992         }
9993
9994         memset(&attr, 0, attr_sz);
9995         attr.size = attr_sz;
9996         attr.config = type;
9997         attr.type = PERF_TYPE_TRACEPOINT;
9998
9999         pfd = syscall(__NR_perf_event_open, &attr,
10000                       pid < 0 ? -1 : pid, /* pid */
10001                       pid == -1 ? 0 : -1, /* cpu */
10002                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10003         if (pfd < 0) {
10004                 err = -errno;
10005                 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10006                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10007                 goto err_clean_legacy;
10008         }
10009         return pfd;
10010
10011 err_clean_legacy:
10012         /* Clear the newly added legacy kprobe_event */
10013         remove_kprobe_event_legacy(probe_name, retprobe);
10014         return err;
10015 }
10016
10017 static const char *arch_specific_syscall_pfx(void)
10018 {
10019 #if defined(__x86_64__)
10020         return "x64";
10021 #elif defined(__i386__)
10022         return "ia32";
10023 #elif defined(__s390x__)
10024         return "s390x";
10025 #elif defined(__s390__)
10026         return "s390";
10027 #elif defined(__arm__)
10028         return "arm";
10029 #elif defined(__aarch64__)
10030         return "arm64";
10031 #elif defined(__mips__)
10032         return "mips";
10033 #elif defined(__riscv)
10034         return "riscv";
10035 #elif defined(__powerpc__)
10036         return "powerpc";
10037 #elif defined(__powerpc64__)
10038         return "powerpc64";
10039 #else
10040         return NULL;
10041 #endif
10042 }
10043
10044 static int probe_kern_syscall_wrapper(void)
10045 {
10046         char syscall_name[64];
10047         const char *ksys_pfx;
10048
10049         ksys_pfx = arch_specific_syscall_pfx();
10050         if (!ksys_pfx)
10051                 return 0;
10052
10053         snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10054
10055         if (determine_kprobe_perf_type() >= 0) {
10056                 int pfd;
10057
10058                 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10059                 if (pfd >= 0)
10060                         close(pfd);
10061
10062                 return pfd >= 0 ? 1 : 0;
10063         } else { /* legacy mode */
10064                 char probe_name[128];
10065
10066                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10067                 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10068                         return 0;
10069
10070                 (void)remove_kprobe_event_legacy(probe_name, false);
10071                 return 1;
10072         }
10073 }
10074
10075 struct bpf_link *
10076 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10077                                 const char *func_name,
10078                                 const struct bpf_kprobe_opts *opts)
10079 {
10080         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10081         char errmsg[STRERR_BUFSIZE];
10082         char *legacy_probe = NULL;
10083         struct bpf_link *link;
10084         size_t offset;
10085         bool retprobe, legacy;
10086         int pfd, err;
10087
10088         if (!OPTS_VALID(opts, bpf_kprobe_opts))
10089                 return libbpf_err_ptr(-EINVAL);
10090
10091         retprobe = OPTS_GET(opts, retprobe, false);
10092         offset = OPTS_GET(opts, offset, 0);
10093         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10094
10095         legacy = determine_kprobe_perf_type() < 0;
10096         if (!legacy) {
10097                 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10098                                             func_name, offset,
10099                                             -1 /* pid */, 0 /* ref_ctr_off */);
10100         } else {
10101                 char probe_name[256];
10102
10103                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10104                                              func_name, offset);
10105
10106                 legacy_probe = strdup(probe_name);
10107                 if (!legacy_probe)
10108                         return libbpf_err_ptr(-ENOMEM);
10109
10110                 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10111                                                     offset, -1 /* pid */);
10112         }
10113         if (pfd < 0) {
10114                 err = -errno;
10115                 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10116                         prog->name, retprobe ? "kretprobe" : "kprobe",
10117                         func_name, offset,
10118                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10119                 goto err_out;
10120         }
10121         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10122         err = libbpf_get_error(link);
10123         if (err) {
10124                 close(pfd);
10125                 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10126                         prog->name, retprobe ? "kretprobe" : "kprobe",
10127                         func_name, offset,
10128                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10129                 goto err_clean_legacy;
10130         }
10131         if (legacy) {
10132                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10133
10134                 perf_link->legacy_probe_name = legacy_probe;
10135                 perf_link->legacy_is_kprobe = true;
10136                 perf_link->legacy_is_retprobe = retprobe;
10137         }
10138
10139         return link;
10140
10141 err_clean_legacy:
10142         if (legacy)
10143                 remove_kprobe_event_legacy(legacy_probe, retprobe);
10144 err_out:
10145         free(legacy_probe);
10146         return libbpf_err_ptr(err);
10147 }
10148
10149 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10150                                             bool retprobe,
10151                                             const char *func_name)
10152 {
10153         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10154                 .retprobe = retprobe,
10155         );
10156
10157         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10158 }
10159
10160 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10161                                               const char *syscall_name,
10162                                               const struct bpf_ksyscall_opts *opts)
10163 {
10164         LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10165         char func_name[128];
10166
10167         if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10168                 return libbpf_err_ptr(-EINVAL);
10169
10170         if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10171                 /* arch_specific_syscall_pfx() should never return NULL here
10172                  * because it is guarded by kernel_supports(). However, since
10173                  * compiler does not know that we have an explicit conditional
10174                  * as well.
10175                  */
10176                 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10177                          arch_specific_syscall_pfx() ? : "", syscall_name);
10178         } else {
10179                 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10180         }
10181
10182         kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10183         kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10184
10185         return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10186 }
10187
10188 /* Adapted from perf/util/string.c */
10189 static bool glob_match(const char *str, const char *pat)
10190 {
10191         while (*str && *pat && *pat != '*') {
10192                 if (*pat == '?') {      /* Matches any single character */
10193                         str++;
10194                         pat++;
10195                         continue;
10196                 }
10197                 if (*str != *pat)
10198                         return false;
10199                 str++;
10200                 pat++;
10201         }
10202         /* Check wild card */
10203         if (*pat == '*') {
10204                 while (*pat == '*')
10205                         pat++;
10206                 if (!*pat) /* Tail wild card matches all */
10207                         return true;
10208                 while (*str)
10209                         if (glob_match(str++, pat))
10210                                 return true;
10211         }
10212         return !*str && !*pat;
10213 }
10214
10215 struct kprobe_multi_resolve {
10216         const char *pattern;
10217         unsigned long *addrs;
10218         size_t cap;
10219         size_t cnt;
10220 };
10221
10222 static int
10223 resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
10224                         const char *sym_name, void *ctx)
10225 {
10226         struct kprobe_multi_resolve *res = ctx;
10227         int err;
10228
10229         if (!glob_match(sym_name, res->pattern))
10230                 return 0;
10231
10232         err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
10233                                 res->cnt + 1);
10234         if (err)
10235                 return err;
10236
10237         res->addrs[res->cnt++] = (unsigned long) sym_addr;
10238         return 0;
10239 }
10240
10241 struct bpf_link *
10242 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
10243                                       const char *pattern,
10244                                       const struct bpf_kprobe_multi_opts *opts)
10245 {
10246         LIBBPF_OPTS(bpf_link_create_opts, lopts);
10247         struct kprobe_multi_resolve res = {
10248                 .pattern = pattern,
10249         };
10250         struct bpf_link *link = NULL;
10251         char errmsg[STRERR_BUFSIZE];
10252         const unsigned long *addrs;
10253         int err, link_fd, prog_fd;
10254         const __u64 *cookies;
10255         const char **syms;
10256         bool retprobe;
10257         size_t cnt;
10258
10259         if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
10260                 return libbpf_err_ptr(-EINVAL);
10261
10262         syms    = OPTS_GET(opts, syms, false);
10263         addrs   = OPTS_GET(opts, addrs, false);
10264         cnt     = OPTS_GET(opts, cnt, false);
10265         cookies = OPTS_GET(opts, cookies, false);
10266
10267         if (!pattern && !addrs && !syms)
10268                 return libbpf_err_ptr(-EINVAL);
10269         if (pattern && (addrs || syms || cookies || cnt))
10270                 return libbpf_err_ptr(-EINVAL);
10271         if (!pattern && !cnt)
10272                 return libbpf_err_ptr(-EINVAL);
10273         if (addrs && syms)
10274                 return libbpf_err_ptr(-EINVAL);
10275
10276         if (pattern) {
10277                 err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
10278                 if (err)
10279                         goto error;
10280                 if (!res.cnt) {
10281                         err = -ENOENT;
10282                         goto error;
10283                 }
10284                 addrs = res.addrs;
10285                 cnt = res.cnt;
10286         }
10287
10288         retprobe = OPTS_GET(opts, retprobe, false);
10289
10290         lopts.kprobe_multi.syms = syms;
10291         lopts.kprobe_multi.addrs = addrs;
10292         lopts.kprobe_multi.cookies = cookies;
10293         lopts.kprobe_multi.cnt = cnt;
10294         lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
10295
10296         link = calloc(1, sizeof(*link));
10297         if (!link) {
10298                 err = -ENOMEM;
10299                 goto error;
10300         }
10301         link->detach = &bpf_link__detach_fd;
10302
10303         prog_fd = bpf_program__fd(prog);
10304         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
10305         if (link_fd < 0) {
10306                 err = -errno;
10307                 pr_warn("prog '%s': failed to attach: %s\n",
10308                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10309                 goto error;
10310         }
10311         link->fd = link_fd;
10312         free(res.addrs);
10313         return link;
10314
10315 error:
10316         free(link);
10317         free(res.addrs);
10318         return libbpf_err_ptr(err);
10319 }
10320
10321 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10322 {
10323         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10324         unsigned long offset = 0;
10325         const char *func_name;
10326         char *func;
10327         int n;
10328
10329         *link = NULL;
10330
10331         /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
10332         if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
10333                 return 0;
10334
10335         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
10336         if (opts.retprobe)
10337                 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
10338         else
10339                 func_name = prog->sec_name + sizeof("kprobe/") - 1;
10340
10341         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10342         if (n < 1) {
10343                 pr_warn("kprobe name is invalid: %s\n", func_name);
10344                 return -EINVAL;
10345         }
10346         if (opts.retprobe && offset != 0) {
10347                 free(func);
10348                 pr_warn("kretprobes do not support offset specification\n");
10349                 return -EINVAL;
10350         }
10351
10352         opts.offset = offset;
10353         *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10354         free(func);
10355         return libbpf_get_error(*link);
10356 }
10357
10358 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10359 {
10360         LIBBPF_OPTS(bpf_ksyscall_opts, opts);
10361         const char *syscall_name;
10362
10363         *link = NULL;
10364
10365         /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
10366         if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
10367                 return 0;
10368
10369         opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
10370         if (opts.retprobe)
10371                 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
10372         else
10373                 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
10374
10375         *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
10376         return *link ? 0 : -errno;
10377 }
10378
10379 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10380 {
10381         LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
10382         const char *spec;
10383         char *pattern;
10384         int n;
10385
10386         *link = NULL;
10387
10388         /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
10389         if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
10390             strcmp(prog->sec_name, "kretprobe.multi") == 0)
10391                 return 0;
10392
10393         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
10394         if (opts.retprobe)
10395                 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
10396         else
10397                 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
10398
10399         n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
10400         if (n < 1) {
10401                 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
10402                 return -EINVAL;
10403         }
10404
10405         *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
10406         free(pattern);
10407         return libbpf_get_error(*link);
10408 }
10409
10410 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
10411                                          const char *binary_path, uint64_t offset)
10412 {
10413         int i;
10414
10415         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
10416
10417         /* sanitize binary_path in the probe name */
10418         for (i = 0; buf[i]; i++) {
10419                 if (!isalnum(buf[i]))
10420                         buf[i] = '_';
10421         }
10422 }
10423
10424 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
10425                                           const char *binary_path, size_t offset)
10426 {
10427         return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
10428                               retprobe ? 'r' : 'p',
10429                               retprobe ? "uretprobes" : "uprobes",
10430                               probe_name, binary_path, offset);
10431 }
10432
10433 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
10434 {
10435         return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
10436                               retprobe ? "uretprobes" : "uprobes", probe_name);
10437 }
10438
10439 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10440 {
10441         char file[512];
10442
10443         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10444                  tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
10445
10446         return parse_uint_from_file(file, "%d\n");
10447 }
10448
10449 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
10450                                          const char *binary_path, size_t offset, int pid)
10451 {
10452         const size_t attr_sz = sizeof(struct perf_event_attr);
10453         struct perf_event_attr attr;
10454         int type, pfd, err;
10455
10456         err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
10457         if (err < 0) {
10458                 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
10459                         binary_path, (size_t)offset, err);
10460                 return err;
10461         }
10462         type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
10463         if (type < 0) {
10464                 err = type;
10465                 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
10466                         binary_path, offset, err);
10467                 goto err_clean_legacy;
10468         }
10469
10470         memset(&attr, 0, attr_sz);
10471         attr.size = attr_sz;
10472         attr.config = type;
10473         attr.type = PERF_TYPE_TRACEPOINT;
10474
10475         pfd = syscall(__NR_perf_event_open, &attr,
10476                       pid < 0 ? -1 : pid, /* pid */
10477                       pid == -1 ? 0 : -1, /* cpu */
10478                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10479         if (pfd < 0) {
10480                 err = -errno;
10481                 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
10482                 goto err_clean_legacy;
10483         }
10484         return pfd;
10485
10486 err_clean_legacy:
10487         /* Clear the newly added legacy uprobe_event */
10488         remove_uprobe_event_legacy(probe_name, retprobe);
10489         return err;
10490 }
10491
10492 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
10493 static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
10494 {
10495         while ((scn = elf_nextscn(elf, scn)) != NULL) {
10496                 GElf_Shdr sh;
10497
10498                 if (!gelf_getshdr(scn, &sh))
10499                         continue;
10500                 if (sh.sh_type == sh_type)
10501                         return scn;
10502         }
10503         return NULL;
10504 }
10505
10506 /* Find offset of function name in object specified by path.  "name" matches
10507  * symbol name or name@@LIB for library functions.
10508  */
10509 static long elf_find_func_offset(const char *binary_path, const char *name)
10510 {
10511         int fd, i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10512         bool is_shared_lib, is_name_qualified;
10513         char errmsg[STRERR_BUFSIZE];
10514         long ret = -ENOENT;
10515         size_t name_len;
10516         GElf_Ehdr ehdr;
10517         Elf *elf;
10518
10519         fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10520         if (fd < 0) {
10521                 ret = -errno;
10522                 pr_warn("failed to open %s: %s\n", binary_path,
10523                         libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10524                 return ret;
10525         }
10526         elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10527         if (!elf) {
10528                 pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10529                 close(fd);
10530                 return -LIBBPF_ERRNO__FORMAT;
10531         }
10532         if (!gelf_getehdr(elf, &ehdr)) {
10533                 pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
10534                 ret = -LIBBPF_ERRNO__FORMAT;
10535                 goto out;
10536         }
10537         /* for shared lib case, we do not need to calculate relative offset */
10538         is_shared_lib = ehdr.e_type == ET_DYN;
10539
10540         name_len = strlen(name);
10541         /* Does name specify "@@LIB"? */
10542         is_name_qualified = strstr(name, "@@") != NULL;
10543
10544         /* Search SHT_DYNSYM, SHT_SYMTAB for symbol.  This search order is used because if
10545          * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
10546          * linked binary may not have SHT_DYMSYM, so absence of a section should not be
10547          * reported as a warning/error.
10548          */
10549         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
10550                 size_t nr_syms, strtabidx, idx;
10551                 Elf_Data *symbols = NULL;
10552                 Elf_Scn *scn = NULL;
10553                 int last_bind = -1;
10554                 const char *sname;
10555                 GElf_Shdr sh;
10556
10557                 scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
10558                 if (!scn) {
10559                         pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
10560                                  binary_path);
10561                         continue;
10562                 }
10563                 if (!gelf_getshdr(scn, &sh))
10564                         continue;
10565                 strtabidx = sh.sh_link;
10566                 symbols = elf_getdata(scn, 0);
10567                 if (!symbols) {
10568                         pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
10569                                 binary_path, elf_errmsg(-1));
10570                         ret = -LIBBPF_ERRNO__FORMAT;
10571                         goto out;
10572                 }
10573                 nr_syms = symbols->d_size / sh.sh_entsize;
10574
10575                 for (idx = 0; idx < nr_syms; idx++) {
10576                         int curr_bind;
10577                         GElf_Sym sym;
10578                         Elf_Scn *sym_scn;
10579                         GElf_Shdr sym_sh;
10580
10581                         if (!gelf_getsym(symbols, idx, &sym))
10582                                 continue;
10583
10584                         if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
10585                                 continue;
10586
10587                         sname = elf_strptr(elf, strtabidx, sym.st_name);
10588                         if (!sname)
10589                                 continue;
10590
10591                         curr_bind = GELF_ST_BIND(sym.st_info);
10592
10593                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
10594                         if (strncmp(sname, name, name_len) != 0)
10595                                 continue;
10596                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
10597                          * additional characters in sname should be of the form "@@LIB".
10598                          */
10599                         if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
10600                                 continue;
10601
10602                         if (ret >= 0) {
10603                                 /* handle multiple matches */
10604                                 if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
10605                                         /* Only accept one non-weak bind. */
10606                                         pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
10607                                                 sname, name, binary_path);
10608                                         ret = -LIBBPF_ERRNO__FORMAT;
10609                                         goto out;
10610                                 } else if (curr_bind == STB_WEAK) {
10611                                         /* already have a non-weak bind, and
10612                                          * this is a weak bind, so ignore.
10613                                          */
10614                                         continue;
10615                                 }
10616                         }
10617
10618                         /* Transform symbol's virtual address (absolute for
10619                          * binaries and relative for shared libs) into file
10620                          * offset, which is what kernel is expecting for
10621                          * uprobe/uretprobe attachment.
10622                          * See Documentation/trace/uprobetracer.rst for more
10623                          * details.
10624                          * This is done by looking up symbol's containing
10625                          * section's header and using it's virtual address
10626                          * (sh_addr) and corresponding file offset (sh_offset)
10627                          * to transform sym.st_value (virtual address) into
10628                          * desired final file offset.
10629                          */
10630                         sym_scn = elf_getscn(elf, sym.st_shndx);
10631                         if (!sym_scn)
10632                                 continue;
10633                         if (!gelf_getshdr(sym_scn, &sym_sh))
10634                                 continue;
10635
10636                         ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
10637                         last_bind = curr_bind;
10638                 }
10639                 if (ret > 0)
10640                         break;
10641         }
10642
10643         if (ret > 0) {
10644                 pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
10645                          ret);
10646         } else {
10647                 if (ret == 0) {
10648                         pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
10649                                 is_shared_lib ? "should not be 0 in a shared library" :
10650                                                 "try using shared library path instead");
10651                         ret = -ENOENT;
10652                 } else {
10653                         pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
10654                 }
10655         }
10656 out:
10657         elf_end(elf);
10658         close(fd);
10659         return ret;
10660 }
10661
10662 static const char *arch_specific_lib_paths(void)
10663 {
10664         /*
10665          * Based on https://packages.debian.org/sid/libc6.
10666          *
10667          * Assume that the traced program is built for the same architecture
10668          * as libbpf, which should cover the vast majority of cases.
10669          */
10670 #if defined(__x86_64__)
10671         return "/lib/x86_64-linux-gnu";
10672 #elif defined(__i386__)
10673         return "/lib/i386-linux-gnu";
10674 #elif defined(__s390x__)
10675         return "/lib/s390x-linux-gnu";
10676 #elif defined(__s390__)
10677         return "/lib/s390-linux-gnu";
10678 #elif defined(__arm__) && defined(__SOFTFP__)
10679         return "/lib/arm-linux-gnueabi";
10680 #elif defined(__arm__) && !defined(__SOFTFP__)
10681         return "/lib/arm-linux-gnueabihf";
10682 #elif defined(__aarch64__)
10683         return "/lib/aarch64-linux-gnu";
10684 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
10685         return "/lib/mips64el-linux-gnuabi64";
10686 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
10687         return "/lib/mipsel-linux-gnu";
10688 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
10689         return "/lib/powerpc64le-linux-gnu";
10690 #elif defined(__sparc__) && defined(__arch64__)
10691         return "/lib/sparc64-linux-gnu";
10692 #elif defined(__riscv) && __riscv_xlen == 64
10693         return "/lib/riscv64-linux-gnu";
10694 #else
10695         return NULL;
10696 #endif
10697 }
10698
10699 /* Get full path to program/shared library. */
10700 static int resolve_full_path(const char *file, char *result, size_t result_sz)
10701 {
10702         const char *search_paths[3] = {};
10703         int i, perm;
10704
10705         if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
10706                 search_paths[0] = getenv("LD_LIBRARY_PATH");
10707                 search_paths[1] = "/usr/lib64:/usr/lib";
10708                 search_paths[2] = arch_specific_lib_paths();
10709                 perm = R_OK;
10710         } else {
10711                 search_paths[0] = getenv("PATH");
10712                 search_paths[1] = "/usr/bin:/usr/sbin";
10713                 perm = R_OK | X_OK;
10714         }
10715
10716         for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
10717                 const char *s;
10718
10719                 if (!search_paths[i])
10720                         continue;
10721                 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
10722                         char *next_path;
10723                         int seg_len;
10724
10725                         if (s[0] == ':')
10726                                 s++;
10727                         next_path = strchr(s, ':');
10728                         seg_len = next_path ? next_path - s : strlen(s);
10729                         if (!seg_len)
10730                                 continue;
10731                         snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
10732                         /* ensure it has required permissions */
10733                         if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
10734                                 continue;
10735                         pr_debug("resolved '%s' to '%s'\n", file, result);
10736                         return 0;
10737                 }
10738         }
10739         return -ENOENT;
10740 }
10741
10742 LIBBPF_API struct bpf_link *
10743 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
10744                                 const char *binary_path, size_t func_offset,
10745                                 const struct bpf_uprobe_opts *opts)
10746 {
10747         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10748         char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
10749         char full_binary_path[PATH_MAX];
10750         struct bpf_link *link;
10751         size_t ref_ctr_off;
10752         int pfd, err;
10753         bool retprobe, legacy;
10754         const char *func_name;
10755
10756         if (!OPTS_VALID(opts, bpf_uprobe_opts))
10757                 return libbpf_err_ptr(-EINVAL);
10758
10759         retprobe = OPTS_GET(opts, retprobe, false);
10760         ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
10761         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10762
10763         if (!binary_path)
10764                 return libbpf_err_ptr(-EINVAL);
10765
10766         if (!strchr(binary_path, '/')) {
10767                 err = resolve_full_path(binary_path, full_binary_path,
10768                                         sizeof(full_binary_path));
10769                 if (err) {
10770                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10771                                 prog->name, binary_path, err);
10772                         return libbpf_err_ptr(err);
10773                 }
10774                 binary_path = full_binary_path;
10775         }
10776         func_name = OPTS_GET(opts, func_name, NULL);
10777         if (func_name) {
10778                 long sym_off;
10779
10780                 sym_off = elf_find_func_offset(binary_path, func_name);
10781                 if (sym_off < 0)
10782                         return libbpf_err_ptr(sym_off);
10783                 func_offset += sym_off;
10784         }
10785
10786         legacy = determine_uprobe_perf_type() < 0;
10787         if (!legacy) {
10788                 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
10789                                             func_offset, pid, ref_ctr_off);
10790         } else {
10791                 char probe_name[PATH_MAX + 64];
10792
10793                 if (ref_ctr_off)
10794                         return libbpf_err_ptr(-EINVAL);
10795
10796                 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
10797                                              binary_path, func_offset);
10798
10799                 legacy_probe = strdup(probe_name);
10800                 if (!legacy_probe)
10801                         return libbpf_err_ptr(-ENOMEM);
10802
10803                 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
10804                                                     binary_path, func_offset, pid);
10805         }
10806         if (pfd < 0) {
10807                 err = -errno;
10808                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
10809                         prog->name, retprobe ? "uretprobe" : "uprobe",
10810                         binary_path, func_offset,
10811                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10812                 goto err_out;
10813         }
10814
10815         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10816         err = libbpf_get_error(link);
10817         if (err) {
10818                 close(pfd);
10819                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
10820                         prog->name, retprobe ? "uretprobe" : "uprobe",
10821                         binary_path, func_offset,
10822                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10823                 goto err_clean_legacy;
10824         }
10825         if (legacy) {
10826                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10827
10828                 perf_link->legacy_probe_name = legacy_probe;
10829                 perf_link->legacy_is_kprobe = false;
10830                 perf_link->legacy_is_retprobe = retprobe;
10831         }
10832         return link;
10833
10834 err_clean_legacy:
10835         if (legacy)
10836                 remove_uprobe_event_legacy(legacy_probe, retprobe);
10837 err_out:
10838         free(legacy_probe);
10839         return libbpf_err_ptr(err);
10840 }
10841
10842 /* Format of u[ret]probe section definition supporting auto-attach:
10843  * u[ret]probe/binary:function[+offset]
10844  *
10845  * binary can be an absolute/relative path or a filename; the latter is resolved to a
10846  * full binary path via bpf_program__attach_uprobe_opts.
10847  *
10848  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
10849  * specified (and auto-attach is not possible) or the above format is specified for
10850  * auto-attach.
10851  */
10852 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10853 {
10854         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
10855         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
10856         int n, ret = -EINVAL;
10857         long offset = 0;
10858
10859         *link = NULL;
10860
10861         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
10862                    &probe_type, &binary_path, &func_name, &offset);
10863         switch (n) {
10864         case 1:
10865                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
10866                 ret = 0;
10867                 break;
10868         case 2:
10869                 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
10870                         prog->name, prog->sec_name);
10871                 break;
10872         case 3:
10873         case 4:
10874                 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
10875                                 strcmp(probe_type, "uretprobe.s") == 0;
10876                 if (opts.retprobe && offset != 0) {
10877                         pr_warn("prog '%s': uretprobes do not support offset specification\n",
10878                                 prog->name);
10879                         break;
10880                 }
10881                 opts.func_name = func_name;
10882                 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
10883                 ret = libbpf_get_error(*link);
10884                 break;
10885         default:
10886                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
10887                         prog->sec_name);
10888                 break;
10889         }
10890         free(probe_type);
10891         free(binary_path);
10892         free(func_name);
10893
10894         return ret;
10895 }
10896
10897 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
10898                                             bool retprobe, pid_t pid,
10899                                             const char *binary_path,
10900                                             size_t func_offset)
10901 {
10902         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
10903
10904         return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
10905 }
10906
10907 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
10908                                           pid_t pid, const char *binary_path,
10909                                           const char *usdt_provider, const char *usdt_name,
10910                                           const struct bpf_usdt_opts *opts)
10911 {
10912         char resolved_path[512];
10913         struct bpf_object *obj = prog->obj;
10914         struct bpf_link *link;
10915         __u64 usdt_cookie;
10916         int err;
10917
10918         if (!OPTS_VALID(opts, bpf_uprobe_opts))
10919                 return libbpf_err_ptr(-EINVAL);
10920
10921         if (bpf_program__fd(prog) < 0) {
10922                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10923                         prog->name);
10924                 return libbpf_err_ptr(-EINVAL);
10925         }
10926
10927         if (!binary_path)
10928                 return libbpf_err_ptr(-EINVAL);
10929
10930         if (!strchr(binary_path, '/')) {
10931                 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
10932                 if (err) {
10933                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
10934                                 prog->name, binary_path, err);
10935                         return libbpf_err_ptr(err);
10936                 }
10937                 binary_path = resolved_path;
10938         }
10939
10940         /* USDT manager is instantiated lazily on first USDT attach. It will
10941          * be destroyed together with BPF object in bpf_object__close().
10942          */
10943         if (IS_ERR(obj->usdt_man))
10944                 return libbpf_ptr(obj->usdt_man);
10945         if (!obj->usdt_man) {
10946                 obj->usdt_man = usdt_manager_new(obj);
10947                 if (IS_ERR(obj->usdt_man))
10948                         return libbpf_ptr(obj->usdt_man);
10949         }
10950
10951         usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
10952         link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
10953                                         usdt_provider, usdt_name, usdt_cookie);
10954         err = libbpf_get_error(link);
10955         if (err)
10956                 return libbpf_err_ptr(err);
10957         return link;
10958 }
10959
10960 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10961 {
10962         char *path = NULL, *provider = NULL, *name = NULL;
10963         const char *sec_name;
10964         int n, err;
10965
10966         sec_name = bpf_program__section_name(prog);
10967         if (strcmp(sec_name, "usdt") == 0) {
10968                 /* no auto-attach for just SEC("usdt") */
10969                 *link = NULL;
10970                 return 0;
10971         }
10972
10973         n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
10974         if (n != 3) {
10975                 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
10976                         sec_name);
10977                 err = -EINVAL;
10978         } else {
10979                 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
10980                                                  provider, name, NULL);
10981                 err = libbpf_get_error(*link);
10982         }
10983         free(path);
10984         free(provider);
10985         free(name);
10986         return err;
10987 }
10988
10989 static int determine_tracepoint_id(const char *tp_category,
10990                                    const char *tp_name)
10991 {
10992         char file[PATH_MAX];
10993         int ret;
10994
10995         ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10996                        tracefs_path(), tp_category, tp_name);
10997         if (ret < 0)
10998                 return -errno;
10999         if (ret >= sizeof(file)) {
11000                 pr_debug("tracepoint %s/%s path is too long\n",
11001                          tp_category, tp_name);
11002                 return -E2BIG;
11003         }
11004         return parse_uint_from_file(file, "%d\n");
11005 }
11006
11007 static int perf_event_open_tracepoint(const char *tp_category,
11008                                       const char *tp_name)
11009 {
11010         const size_t attr_sz = sizeof(struct perf_event_attr);
11011         struct perf_event_attr attr;
11012         char errmsg[STRERR_BUFSIZE];
11013         int tp_id, pfd, err;
11014
11015         tp_id = determine_tracepoint_id(tp_category, tp_name);
11016         if (tp_id < 0) {
11017                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
11018                         tp_category, tp_name,
11019                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
11020                 return tp_id;
11021         }
11022
11023         memset(&attr, 0, attr_sz);
11024         attr.type = PERF_TYPE_TRACEPOINT;
11025         attr.size = attr_sz;
11026         attr.config = tp_id;
11027
11028         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
11029                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11030         if (pfd < 0) {
11031                 err = -errno;
11032                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
11033                         tp_category, tp_name,
11034                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11035                 return err;
11036         }
11037         return pfd;
11038 }
11039
11040 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
11041                                                      const char *tp_category,
11042                                                      const char *tp_name,
11043                                                      const struct bpf_tracepoint_opts *opts)
11044 {
11045         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11046         char errmsg[STRERR_BUFSIZE];
11047         struct bpf_link *link;
11048         int pfd, err;
11049
11050         if (!OPTS_VALID(opts, bpf_tracepoint_opts))
11051                 return libbpf_err_ptr(-EINVAL);
11052
11053         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11054
11055         pfd = perf_event_open_tracepoint(tp_category, tp_name);
11056         if (pfd < 0) {
11057                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
11058                         prog->name, tp_category, tp_name,
11059                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11060                 return libbpf_err_ptr(pfd);
11061         }
11062         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11063         err = libbpf_get_error(link);
11064         if (err) {
11065                 close(pfd);
11066                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
11067                         prog->name, tp_category, tp_name,
11068                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11069                 return libbpf_err_ptr(err);
11070         }
11071         return link;
11072 }
11073
11074 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
11075                                                 const char *tp_category,
11076                                                 const char *tp_name)
11077 {
11078         return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
11079 }
11080
11081 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11082 {
11083         char *sec_name, *tp_cat, *tp_name;
11084
11085         *link = NULL;
11086
11087         /* no auto-attach for SEC("tp") or SEC("tracepoint") */
11088         if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
11089                 return 0;
11090
11091         sec_name = strdup(prog->sec_name);
11092         if (!sec_name)
11093                 return -ENOMEM;
11094
11095         /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
11096         if (str_has_pfx(prog->sec_name, "tp/"))
11097                 tp_cat = sec_name + sizeof("tp/") - 1;
11098         else
11099                 tp_cat = sec_name + sizeof("tracepoint/") - 1;
11100         tp_name = strchr(tp_cat, '/');
11101         if (!tp_name) {
11102                 free(sec_name);
11103                 return -EINVAL;
11104         }
11105         *tp_name = '\0';
11106         tp_name++;
11107
11108         *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
11109         free(sec_name);
11110         return libbpf_get_error(*link);
11111 }
11112
11113 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
11114                                                     const char *tp_name)
11115 {
11116         char errmsg[STRERR_BUFSIZE];
11117         struct bpf_link *link;
11118         int prog_fd, pfd;
11119
11120         prog_fd = bpf_program__fd(prog);
11121         if (prog_fd < 0) {
11122                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11123                 return libbpf_err_ptr(-EINVAL);
11124         }
11125
11126         link = calloc(1, sizeof(*link));
11127         if (!link)
11128                 return libbpf_err_ptr(-ENOMEM);
11129         link->detach = &bpf_link__detach_fd;
11130
11131         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
11132         if (pfd < 0) {
11133                 pfd = -errno;
11134                 free(link);
11135                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
11136                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11137                 return libbpf_err_ptr(pfd);
11138         }
11139         link->fd = pfd;
11140         return link;
11141 }
11142
11143 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11144 {
11145         static const char *const prefixes[] = {
11146                 "raw_tp",
11147                 "raw_tracepoint",
11148                 "raw_tp.w",
11149                 "raw_tracepoint.w",
11150         };
11151         size_t i;
11152         const char *tp_name = NULL;
11153
11154         *link = NULL;
11155
11156         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
11157                 size_t pfx_len;
11158
11159                 if (!str_has_pfx(prog->sec_name, prefixes[i]))
11160                         continue;
11161
11162                 pfx_len = strlen(prefixes[i]);
11163                 /* no auto-attach case of, e.g., SEC("raw_tp") */
11164                 if (prog->sec_name[pfx_len] == '\0')
11165                         return 0;
11166
11167                 if (prog->sec_name[pfx_len] != '/')
11168                         continue;
11169
11170                 tp_name = prog->sec_name + pfx_len + 1;
11171                 break;
11172         }
11173
11174         if (!tp_name) {
11175                 pr_warn("prog '%s': invalid section name '%s'\n",
11176                         prog->name, prog->sec_name);
11177                 return -EINVAL;
11178         }
11179
11180         *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
11181         return libbpf_get_error(link);
11182 }
11183
11184 /* Common logic for all BPF program types that attach to a btf_id */
11185 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
11186                                                    const struct bpf_trace_opts *opts)
11187 {
11188         LIBBPF_OPTS(bpf_link_create_opts, link_opts);
11189         char errmsg[STRERR_BUFSIZE];
11190         struct bpf_link *link;
11191         int prog_fd, pfd;
11192
11193         if (!OPTS_VALID(opts, bpf_trace_opts))
11194                 return libbpf_err_ptr(-EINVAL);
11195
11196         prog_fd = bpf_program__fd(prog);
11197         if (prog_fd < 0) {
11198                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11199                 return libbpf_err_ptr(-EINVAL);
11200         }
11201
11202         link = calloc(1, sizeof(*link));
11203         if (!link)
11204                 return libbpf_err_ptr(-ENOMEM);
11205         link->detach = &bpf_link__detach_fd;
11206
11207         /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
11208         link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
11209         pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
11210         if (pfd < 0) {
11211                 pfd = -errno;
11212                 free(link);
11213                 pr_warn("prog '%s': failed to attach: %s\n",
11214                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11215                 return libbpf_err_ptr(pfd);
11216         }
11217         link->fd = pfd;
11218         return link;
11219 }
11220
11221 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
11222 {
11223         return bpf_program__attach_btf_id(prog, NULL);
11224 }
11225
11226 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
11227                                                 const struct bpf_trace_opts *opts)
11228 {
11229         return bpf_program__attach_btf_id(prog, opts);
11230 }
11231
11232 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
11233 {
11234         return bpf_program__attach_btf_id(prog, NULL);
11235 }
11236
11237 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11238 {
11239         *link = bpf_program__attach_trace(prog);
11240         return libbpf_get_error(*link);
11241 }
11242
11243 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11244 {
11245         *link = bpf_program__attach_lsm(prog);
11246         return libbpf_get_error(*link);
11247 }
11248
11249 static struct bpf_link *
11250 bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
11251                        const char *target_name)
11252 {
11253         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
11254                             .target_btf_id = btf_id);
11255         enum bpf_attach_type attach_type;
11256         char errmsg[STRERR_BUFSIZE];
11257         struct bpf_link *link;
11258         int prog_fd, link_fd;
11259
11260         prog_fd = bpf_program__fd(prog);
11261         if (prog_fd < 0) {
11262                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11263                 return libbpf_err_ptr(-EINVAL);
11264         }
11265
11266         link = calloc(1, sizeof(*link));
11267         if (!link)
11268                 return libbpf_err_ptr(-ENOMEM);
11269         link->detach = &bpf_link__detach_fd;
11270
11271         attach_type = bpf_program__expected_attach_type(prog);
11272         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
11273         if (link_fd < 0) {
11274                 link_fd = -errno;
11275                 free(link);
11276                 pr_warn("prog '%s': failed to attach to %s: %s\n",
11277                         prog->name, target_name,
11278                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11279                 return libbpf_err_ptr(link_fd);
11280         }
11281         link->fd = link_fd;
11282         return link;
11283 }
11284
11285 struct bpf_link *
11286 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
11287 {
11288         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
11289 }
11290
11291 struct bpf_link *
11292 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
11293 {
11294         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
11295 }
11296
11297 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
11298 {
11299         /* target_fd/target_ifindex use the same field in LINK_CREATE */
11300         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
11301 }
11302
11303 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
11304                                               int target_fd,
11305                                               const char *attach_func_name)
11306 {
11307         int btf_id;
11308
11309         if (!!target_fd != !!attach_func_name) {
11310                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
11311                         prog->name);
11312                 return libbpf_err_ptr(-EINVAL);
11313         }
11314
11315         if (prog->type != BPF_PROG_TYPE_EXT) {
11316                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
11317                         prog->name);
11318                 return libbpf_err_ptr(-EINVAL);
11319         }
11320
11321         if (target_fd) {
11322                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
11323                 if (btf_id < 0)
11324                         return libbpf_err_ptr(btf_id);
11325
11326                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
11327         } else {
11328                 /* no target, so use raw_tracepoint_open for compatibility
11329                  * with old kernels
11330                  */
11331                 return bpf_program__attach_trace(prog);
11332         }
11333 }
11334
11335 struct bpf_link *
11336 bpf_program__attach_iter(const struct bpf_program *prog,
11337                          const struct bpf_iter_attach_opts *opts)
11338 {
11339         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
11340         char errmsg[STRERR_BUFSIZE];
11341         struct bpf_link *link;
11342         int prog_fd, link_fd;
11343         __u32 target_fd = 0;
11344
11345         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
11346                 return libbpf_err_ptr(-EINVAL);
11347
11348         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
11349         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
11350
11351         prog_fd = bpf_program__fd(prog);
11352         if (prog_fd < 0) {
11353                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11354                 return libbpf_err_ptr(-EINVAL);
11355         }
11356
11357         link = calloc(1, sizeof(*link));
11358         if (!link)
11359                 return libbpf_err_ptr(-ENOMEM);
11360         link->detach = &bpf_link__detach_fd;
11361
11362         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
11363                                   &link_create_opts);
11364         if (link_fd < 0) {
11365                 link_fd = -errno;
11366                 free(link);
11367                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
11368                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11369                 return libbpf_err_ptr(link_fd);
11370         }
11371         link->fd = link_fd;
11372         return link;
11373 }
11374
11375 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11376 {
11377         *link = bpf_program__attach_iter(prog, NULL);
11378         return libbpf_get_error(*link);
11379 }
11380
11381 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
11382 {
11383         struct bpf_link *link = NULL;
11384         int err;
11385
11386         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
11387                 return libbpf_err_ptr(-EOPNOTSUPP);
11388
11389         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
11390         if (err)
11391                 return libbpf_err_ptr(err);
11392
11393         /* When calling bpf_program__attach() explicitly, auto-attach support
11394          * is expected to work, so NULL returned link is considered an error.
11395          * This is different for skeleton's attach, see comment in
11396          * bpf_object__attach_skeleton().
11397          */
11398         if (!link)
11399                 return libbpf_err_ptr(-EOPNOTSUPP);
11400
11401         return link;
11402 }
11403
11404 static int bpf_link__detach_struct_ops(struct bpf_link *link)
11405 {
11406         __u32 zero = 0;
11407
11408         if (bpf_map_delete_elem(link->fd, &zero))
11409                 return -errno;
11410
11411         return 0;
11412 }
11413
11414 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
11415 {
11416         struct bpf_struct_ops *st_ops;
11417         struct bpf_link *link;
11418         __u32 i, zero = 0;
11419         int err;
11420
11421         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
11422                 return libbpf_err_ptr(-EINVAL);
11423
11424         link = calloc(1, sizeof(*link));
11425         if (!link)
11426                 return libbpf_err_ptr(-EINVAL);
11427
11428         st_ops = map->st_ops;
11429         for (i = 0; i < btf_vlen(st_ops->type); i++) {
11430                 struct bpf_program *prog = st_ops->progs[i];
11431                 void *kern_data;
11432                 int prog_fd;
11433
11434                 if (!prog)
11435                         continue;
11436
11437                 prog_fd = bpf_program__fd(prog);
11438                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
11439                 *(unsigned long *)kern_data = prog_fd;
11440         }
11441
11442         err = bpf_map_update_elem(map->fd, &zero, st_ops->kern_vdata, 0);
11443         if (err) {
11444                 err = -errno;
11445                 free(link);
11446                 return libbpf_err_ptr(err);
11447         }
11448
11449         link->detach = bpf_link__detach_struct_ops;
11450         link->fd = map->fd;
11451
11452         return link;
11453 }
11454
11455 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
11456                                                           void *private_data);
11457
11458 static enum bpf_perf_event_ret
11459 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
11460                        void **copy_mem, size_t *copy_size,
11461                        bpf_perf_event_print_t fn, void *private_data)
11462 {
11463         struct perf_event_mmap_page *header = mmap_mem;
11464         __u64 data_head = ring_buffer_read_head(header);
11465         __u64 data_tail = header->data_tail;
11466         void *base = ((__u8 *)header) + page_size;
11467         int ret = LIBBPF_PERF_EVENT_CONT;
11468         struct perf_event_header *ehdr;
11469         size_t ehdr_size;
11470
11471         while (data_head != data_tail) {
11472                 ehdr = base + (data_tail & (mmap_size - 1));
11473                 ehdr_size = ehdr->size;
11474
11475                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
11476                         void *copy_start = ehdr;
11477                         size_t len_first = base + mmap_size - copy_start;
11478                         size_t len_secnd = ehdr_size - len_first;
11479
11480                         if (*copy_size < ehdr_size) {
11481                                 free(*copy_mem);
11482                                 *copy_mem = malloc(ehdr_size);
11483                                 if (!*copy_mem) {
11484                                         *copy_size = 0;
11485                                         ret = LIBBPF_PERF_EVENT_ERROR;
11486                                         break;
11487                                 }
11488                                 *copy_size = ehdr_size;
11489                         }
11490
11491                         memcpy(*copy_mem, copy_start, len_first);
11492                         memcpy(*copy_mem + len_first, base, len_secnd);
11493                         ehdr = *copy_mem;
11494                 }
11495
11496                 ret = fn(ehdr, private_data);
11497                 data_tail += ehdr_size;
11498                 if (ret != LIBBPF_PERF_EVENT_CONT)
11499                         break;
11500         }
11501
11502         ring_buffer_write_tail(header, data_tail);
11503         return libbpf_err(ret);
11504 }
11505
11506 struct perf_buffer;
11507
11508 struct perf_buffer_params {
11509         struct perf_event_attr *attr;
11510         /* if event_cb is specified, it takes precendence */
11511         perf_buffer_event_fn event_cb;
11512         /* sample_cb and lost_cb are higher-level common-case callbacks */
11513         perf_buffer_sample_fn sample_cb;
11514         perf_buffer_lost_fn lost_cb;
11515         void *ctx;
11516         int cpu_cnt;
11517         int *cpus;
11518         int *map_keys;
11519 };
11520
11521 struct perf_cpu_buf {
11522         struct perf_buffer *pb;
11523         void *base; /* mmap()'ed memory */
11524         void *buf; /* for reconstructing segmented data */
11525         size_t buf_size;
11526         int fd;
11527         int cpu;
11528         int map_key;
11529 };
11530
11531 struct perf_buffer {
11532         perf_buffer_event_fn event_cb;
11533         perf_buffer_sample_fn sample_cb;
11534         perf_buffer_lost_fn lost_cb;
11535         void *ctx; /* passed into callbacks */
11536
11537         size_t page_size;
11538         size_t mmap_size;
11539         struct perf_cpu_buf **cpu_bufs;
11540         struct epoll_event *events;
11541         int cpu_cnt; /* number of allocated CPU buffers */
11542         int epoll_fd; /* perf event FD */
11543         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
11544 };
11545
11546 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
11547                                       struct perf_cpu_buf *cpu_buf)
11548 {
11549         if (!cpu_buf)
11550                 return;
11551         if (cpu_buf->base &&
11552             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
11553                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
11554         if (cpu_buf->fd >= 0) {
11555                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
11556                 close(cpu_buf->fd);
11557         }
11558         free(cpu_buf->buf);
11559         free(cpu_buf);
11560 }
11561
11562 void perf_buffer__free(struct perf_buffer *pb)
11563 {
11564         int i;
11565
11566         if (IS_ERR_OR_NULL(pb))
11567                 return;
11568         if (pb->cpu_bufs) {
11569                 for (i = 0; i < pb->cpu_cnt; i++) {
11570                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11571
11572                         if (!cpu_buf)
11573                                 continue;
11574
11575                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
11576                         perf_buffer__free_cpu_buf(pb, cpu_buf);
11577                 }
11578                 free(pb->cpu_bufs);
11579         }
11580         if (pb->epoll_fd >= 0)
11581                 close(pb->epoll_fd);
11582         free(pb->events);
11583         free(pb);
11584 }
11585
11586 static struct perf_cpu_buf *
11587 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
11588                           int cpu, int map_key)
11589 {
11590         struct perf_cpu_buf *cpu_buf;
11591         char msg[STRERR_BUFSIZE];
11592         int err;
11593
11594         cpu_buf = calloc(1, sizeof(*cpu_buf));
11595         if (!cpu_buf)
11596                 return ERR_PTR(-ENOMEM);
11597
11598         cpu_buf->pb = pb;
11599         cpu_buf->cpu = cpu;
11600         cpu_buf->map_key = map_key;
11601
11602         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11603                               -1, PERF_FLAG_FD_CLOEXEC);
11604         if (cpu_buf->fd < 0) {
11605                 err = -errno;
11606                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11607                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11608                 goto error;
11609         }
11610
11611         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11612                              PROT_READ | PROT_WRITE, MAP_SHARED,
11613                              cpu_buf->fd, 0);
11614         if (cpu_buf->base == MAP_FAILED) {
11615                 cpu_buf->base = NULL;
11616                 err = -errno;
11617                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11618                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11619                 goto error;
11620         }
11621
11622         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11623                 err = -errno;
11624                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11625                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11626                 goto error;
11627         }
11628
11629         return cpu_buf;
11630
11631 error:
11632         perf_buffer__free_cpu_buf(pb, cpu_buf);
11633         return (struct perf_cpu_buf *)ERR_PTR(err);
11634 }
11635
11636 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11637                                               struct perf_buffer_params *p);
11638
11639 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
11640                                      perf_buffer_sample_fn sample_cb,
11641                                      perf_buffer_lost_fn lost_cb,
11642                                      void *ctx,
11643                                      const struct perf_buffer_opts *opts)
11644 {
11645         const size_t attr_sz = sizeof(struct perf_event_attr);
11646         struct perf_buffer_params p = {};
11647         struct perf_event_attr attr;
11648
11649         if (!OPTS_VALID(opts, perf_buffer_opts))
11650                 return libbpf_err_ptr(-EINVAL);
11651
11652         memset(&attr, 0, attr_sz);
11653         attr.size = attr_sz;
11654         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
11655         attr.type = PERF_TYPE_SOFTWARE;
11656         attr.sample_type = PERF_SAMPLE_RAW;
11657         attr.sample_period = 1;
11658         attr.wakeup_events = 1;
11659
11660         p.attr = &attr;
11661         p.sample_cb = sample_cb;
11662         p.lost_cb = lost_cb;
11663         p.ctx = ctx;
11664
11665         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11666 }
11667
11668 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
11669                                          struct perf_event_attr *attr,
11670                                          perf_buffer_event_fn event_cb, void *ctx,
11671                                          const struct perf_buffer_raw_opts *opts)
11672 {
11673         struct perf_buffer_params p = {};
11674
11675         if (!attr)
11676                 return libbpf_err_ptr(-EINVAL);
11677
11678         if (!OPTS_VALID(opts, perf_buffer_raw_opts))
11679                 return libbpf_err_ptr(-EINVAL);
11680
11681         p.attr = attr;
11682         p.event_cb = event_cb;
11683         p.ctx = ctx;
11684         p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
11685         p.cpus = OPTS_GET(opts, cpus, NULL);
11686         p.map_keys = OPTS_GET(opts, map_keys, NULL);
11687
11688         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
11689 }
11690
11691 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
11692                                               struct perf_buffer_params *p)
11693 {
11694         const char *online_cpus_file = "/sys/devices/system/cpu/online";
11695         struct bpf_map_info map;
11696         char msg[STRERR_BUFSIZE];
11697         struct perf_buffer *pb;
11698         bool *online = NULL;
11699         __u32 map_info_len;
11700         int err, i, j, n;
11701
11702         if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
11703                 pr_warn("page count should be power of two, but is %zu\n",
11704                         page_cnt);
11705                 return ERR_PTR(-EINVAL);
11706         }
11707
11708         /* best-effort sanity checks */
11709         memset(&map, 0, sizeof(map));
11710         map_info_len = sizeof(map);
11711         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
11712         if (err) {
11713                 err = -errno;
11714                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
11715                  * -EBADFD, -EFAULT, or -E2BIG on real error
11716                  */
11717                 if (err != -EINVAL) {
11718                         pr_warn("failed to get map info for map FD %d: %s\n",
11719                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
11720                         return ERR_PTR(err);
11721                 }
11722                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
11723                          map_fd);
11724         } else {
11725                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
11726                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
11727                                 map.name);
11728                         return ERR_PTR(-EINVAL);
11729                 }
11730         }
11731
11732         pb = calloc(1, sizeof(*pb));
11733         if (!pb)
11734                 return ERR_PTR(-ENOMEM);
11735
11736         pb->event_cb = p->event_cb;
11737         pb->sample_cb = p->sample_cb;
11738         pb->lost_cb = p->lost_cb;
11739         pb->ctx = p->ctx;
11740
11741         pb->page_size = getpagesize();
11742         pb->mmap_size = pb->page_size * page_cnt;
11743         pb->map_fd = map_fd;
11744
11745         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
11746         if (pb->epoll_fd < 0) {
11747                 err = -errno;
11748                 pr_warn("failed to create epoll instance: %s\n",
11749                         libbpf_strerror_r(err, msg, sizeof(msg)));
11750                 goto error;
11751         }
11752
11753         if (p->cpu_cnt > 0) {
11754                 pb->cpu_cnt = p->cpu_cnt;
11755         } else {
11756                 pb->cpu_cnt = libbpf_num_possible_cpus();
11757                 if (pb->cpu_cnt < 0) {
11758                         err = pb->cpu_cnt;
11759                         goto error;
11760                 }
11761                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
11762                         pb->cpu_cnt = map.max_entries;
11763         }
11764
11765         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
11766         if (!pb->events) {
11767                 err = -ENOMEM;
11768                 pr_warn("failed to allocate events: out of memory\n");
11769                 goto error;
11770         }
11771         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
11772         if (!pb->cpu_bufs) {
11773                 err = -ENOMEM;
11774                 pr_warn("failed to allocate buffers: out of memory\n");
11775                 goto error;
11776         }
11777
11778         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
11779         if (err) {
11780                 pr_warn("failed to get online CPU mask: %d\n", err);
11781                 goto error;
11782         }
11783
11784         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
11785                 struct perf_cpu_buf *cpu_buf;
11786                 int cpu, map_key;
11787
11788                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
11789                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
11790
11791                 /* in case user didn't explicitly requested particular CPUs to
11792                  * be attached to, skip offline/not present CPUs
11793                  */
11794                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
11795                         continue;
11796
11797                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
11798                 if (IS_ERR(cpu_buf)) {
11799                         err = PTR_ERR(cpu_buf);
11800                         goto error;
11801                 }
11802
11803                 pb->cpu_bufs[j] = cpu_buf;
11804
11805                 err = bpf_map_update_elem(pb->map_fd, &map_key,
11806                                           &cpu_buf->fd, 0);
11807                 if (err) {
11808                         err = -errno;
11809                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
11810                                 cpu, map_key, cpu_buf->fd,
11811                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11812                         goto error;
11813                 }
11814
11815                 pb->events[j].events = EPOLLIN;
11816                 pb->events[j].data.ptr = cpu_buf;
11817                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
11818                               &pb->events[j]) < 0) {
11819                         err = -errno;
11820                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
11821                                 cpu, cpu_buf->fd,
11822                                 libbpf_strerror_r(err, msg, sizeof(msg)));
11823                         goto error;
11824                 }
11825                 j++;
11826         }
11827         pb->cpu_cnt = j;
11828         free(online);
11829
11830         return pb;
11831
11832 error:
11833         free(online);
11834         if (pb)
11835                 perf_buffer__free(pb);
11836         return ERR_PTR(err);
11837 }
11838
11839 struct perf_sample_raw {
11840         struct perf_event_header header;
11841         uint32_t size;
11842         char data[];
11843 };
11844
11845 struct perf_sample_lost {
11846         struct perf_event_header header;
11847         uint64_t id;
11848         uint64_t lost;
11849         uint64_t sample_id;
11850 };
11851
11852 static enum bpf_perf_event_ret
11853 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
11854 {
11855         struct perf_cpu_buf *cpu_buf = ctx;
11856         struct perf_buffer *pb = cpu_buf->pb;
11857         void *data = e;
11858
11859         /* user wants full control over parsing perf event */
11860         if (pb->event_cb)
11861                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
11862
11863         switch (e->type) {
11864         case PERF_RECORD_SAMPLE: {
11865                 struct perf_sample_raw *s = data;
11866
11867                 if (pb->sample_cb)
11868                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
11869                 break;
11870         }
11871         case PERF_RECORD_LOST: {
11872                 struct perf_sample_lost *s = data;
11873
11874                 if (pb->lost_cb)
11875                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
11876                 break;
11877         }
11878         default:
11879                 pr_warn("unknown perf sample type %d\n", e->type);
11880                 return LIBBPF_PERF_EVENT_ERROR;
11881         }
11882         return LIBBPF_PERF_EVENT_CONT;
11883 }
11884
11885 static int perf_buffer__process_records(struct perf_buffer *pb,
11886                                         struct perf_cpu_buf *cpu_buf)
11887 {
11888         enum bpf_perf_event_ret ret;
11889
11890         ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
11891                                      pb->page_size, &cpu_buf->buf,
11892                                      &cpu_buf->buf_size,
11893                                      perf_buffer__process_record, cpu_buf);
11894         if (ret != LIBBPF_PERF_EVENT_CONT)
11895                 return ret;
11896         return 0;
11897 }
11898
11899 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
11900 {
11901         return pb->epoll_fd;
11902 }
11903
11904 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
11905 {
11906         int i, cnt, err;
11907
11908         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
11909         if (cnt < 0)
11910                 return -errno;
11911
11912         for (i = 0; i < cnt; i++) {
11913                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
11914
11915                 err = perf_buffer__process_records(pb, cpu_buf);
11916                 if (err) {
11917                         pr_warn("error while processing records: %d\n", err);
11918                         return libbpf_err(err);
11919                 }
11920         }
11921         return cnt;
11922 }
11923
11924 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
11925  * manager.
11926  */
11927 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
11928 {
11929         return pb->cpu_cnt;
11930 }
11931
11932 /*
11933  * Return perf_event FD of a ring buffer in *buf_idx* slot of
11934  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
11935  * select()/poll()/epoll() Linux syscalls.
11936  */
11937 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
11938 {
11939         struct perf_cpu_buf *cpu_buf;
11940
11941         if (buf_idx >= pb->cpu_cnt)
11942                 return libbpf_err(-EINVAL);
11943
11944         cpu_buf = pb->cpu_bufs[buf_idx];
11945         if (!cpu_buf)
11946                 return libbpf_err(-ENOENT);
11947
11948         return cpu_buf->fd;
11949 }
11950
11951 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
11952 {
11953         struct perf_cpu_buf *cpu_buf;
11954
11955         if (buf_idx >= pb->cpu_cnt)
11956                 return libbpf_err(-EINVAL);
11957
11958         cpu_buf = pb->cpu_bufs[buf_idx];
11959         if (!cpu_buf)
11960                 return libbpf_err(-ENOENT);
11961
11962         *buf = cpu_buf->base;
11963         *buf_size = pb->mmap_size;
11964         return 0;
11965 }
11966
11967 /*
11968  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
11969  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
11970  * consume, do nothing and return success.
11971  * Returns:
11972  *   - 0 on success;
11973  *   - <0 on failure.
11974  */
11975 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
11976 {
11977         struct perf_cpu_buf *cpu_buf;
11978
11979         if (buf_idx >= pb->cpu_cnt)
11980                 return libbpf_err(-EINVAL);
11981
11982         cpu_buf = pb->cpu_bufs[buf_idx];
11983         if (!cpu_buf)
11984                 return libbpf_err(-ENOENT);
11985
11986         return perf_buffer__process_records(pb, cpu_buf);
11987 }
11988
11989 int perf_buffer__consume(struct perf_buffer *pb)
11990 {
11991         int i, err;
11992
11993         for (i = 0; i < pb->cpu_cnt; i++) {
11994                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11995
11996                 if (!cpu_buf)
11997                         continue;
11998
11999                 err = perf_buffer__process_records(pb, cpu_buf);
12000                 if (err) {
12001                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
12002                         return libbpf_err(err);
12003                 }
12004         }
12005         return 0;
12006 }
12007
12008 int bpf_program__set_attach_target(struct bpf_program *prog,
12009                                    int attach_prog_fd,
12010                                    const char *attach_func_name)
12011 {
12012         int btf_obj_fd = 0, btf_id = 0, err;
12013
12014         if (!prog || attach_prog_fd < 0)
12015                 return libbpf_err(-EINVAL);
12016
12017         if (prog->obj->loaded)
12018                 return libbpf_err(-EINVAL);
12019
12020         if (attach_prog_fd && !attach_func_name) {
12021                 /* remember attach_prog_fd and let bpf_program__load() find
12022                  * BTF ID during the program load
12023                  */
12024                 prog->attach_prog_fd = attach_prog_fd;
12025                 return 0;
12026         }
12027
12028         if (attach_prog_fd) {
12029                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
12030                                                  attach_prog_fd);
12031                 if (btf_id < 0)
12032                         return libbpf_err(btf_id);
12033         } else {
12034                 if (!attach_func_name)
12035                         return libbpf_err(-EINVAL);
12036
12037                 /* load btf_vmlinux, if not yet */
12038                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
12039                 if (err)
12040                         return libbpf_err(err);
12041                 err = find_kernel_btf_id(prog->obj, attach_func_name,
12042                                          prog->expected_attach_type,
12043                                          &btf_obj_fd, &btf_id);
12044                 if (err)
12045                         return libbpf_err(err);
12046         }
12047
12048         prog->attach_btf_id = btf_id;
12049         prog->attach_btf_obj_fd = btf_obj_fd;
12050         prog->attach_prog_fd = attach_prog_fd;
12051         return 0;
12052 }
12053
12054 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
12055 {
12056         int err = 0, n, len, start, end = -1;
12057         bool *tmp;
12058
12059         *mask = NULL;
12060         *mask_sz = 0;
12061
12062         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
12063         while (*s) {
12064                 if (*s == ',' || *s == '\n') {
12065                         s++;
12066                         continue;
12067                 }
12068                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
12069                 if (n <= 0 || n > 2) {
12070                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
12071                         err = -EINVAL;
12072                         goto cleanup;
12073                 } else if (n == 1) {
12074                         end = start;
12075                 }
12076                 if (start < 0 || start > end) {
12077                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
12078                                 start, end, s);
12079                         err = -EINVAL;
12080                         goto cleanup;
12081                 }
12082                 tmp = realloc(*mask, end + 1);
12083                 if (!tmp) {
12084                         err = -ENOMEM;
12085                         goto cleanup;
12086                 }
12087                 *mask = tmp;
12088                 memset(tmp + *mask_sz, 0, start - *mask_sz);
12089                 memset(tmp + start, 1, end - start + 1);
12090                 *mask_sz = end + 1;
12091                 s += len;
12092         }
12093         if (!*mask_sz) {
12094                 pr_warn("Empty CPU range\n");
12095                 return -EINVAL;
12096         }
12097         return 0;
12098 cleanup:
12099         free(*mask);
12100         *mask = NULL;
12101         return err;
12102 }
12103
12104 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
12105 {
12106         int fd, err = 0, len;
12107         char buf[128];
12108
12109         fd = open(fcpu, O_RDONLY | O_CLOEXEC);
12110         if (fd < 0) {
12111                 err = -errno;
12112                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
12113                 return err;
12114         }
12115         len = read(fd, buf, sizeof(buf));
12116         close(fd);
12117         if (len <= 0) {
12118                 err = len ? -errno : -EINVAL;
12119                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
12120                 return err;
12121         }
12122         if (len >= sizeof(buf)) {
12123                 pr_warn("CPU mask is too big in file %s\n", fcpu);
12124                 return -E2BIG;
12125         }
12126         buf[len] = '\0';
12127
12128         return parse_cpu_mask_str(buf, mask, mask_sz);
12129 }
12130
12131 int libbpf_num_possible_cpus(void)
12132 {
12133         static const char *fcpu = "/sys/devices/system/cpu/possible";
12134         static int cpus;
12135         int err, n, i, tmp_cpus;
12136         bool *mask;
12137
12138         tmp_cpus = READ_ONCE(cpus);
12139         if (tmp_cpus > 0)
12140                 return tmp_cpus;
12141
12142         err = parse_cpu_mask_file(fcpu, &mask, &n);
12143         if (err)
12144                 return libbpf_err(err);
12145
12146         tmp_cpus = 0;
12147         for (i = 0; i < n; i++) {
12148                 if (mask[i])
12149                         tmp_cpus++;
12150         }
12151         free(mask);
12152
12153         WRITE_ONCE(cpus, tmp_cpus);
12154         return tmp_cpus;
12155 }
12156
12157 static int populate_skeleton_maps(const struct bpf_object *obj,
12158                                   struct bpf_map_skeleton *maps,
12159                                   size_t map_cnt)
12160 {
12161         int i;
12162
12163         for (i = 0; i < map_cnt; i++) {
12164                 struct bpf_map **map = maps[i].map;
12165                 const char *name = maps[i].name;
12166                 void **mmaped = maps[i].mmaped;
12167
12168                 *map = bpf_object__find_map_by_name(obj, name);
12169                 if (!*map) {
12170                         pr_warn("failed to find skeleton map '%s'\n", name);
12171                         return -ESRCH;
12172                 }
12173
12174                 /* externs shouldn't be pre-setup from user code */
12175                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
12176                         *mmaped = (*map)->mmaped;
12177         }
12178         return 0;
12179 }
12180
12181 static int populate_skeleton_progs(const struct bpf_object *obj,
12182                                    struct bpf_prog_skeleton *progs,
12183                                    size_t prog_cnt)
12184 {
12185         int i;
12186
12187         for (i = 0; i < prog_cnt; i++) {
12188                 struct bpf_program **prog = progs[i].prog;
12189                 const char *name = progs[i].name;
12190
12191                 *prog = bpf_object__find_program_by_name(obj, name);
12192                 if (!*prog) {
12193                         pr_warn("failed to find skeleton program '%s'\n", name);
12194                         return -ESRCH;
12195                 }
12196         }
12197         return 0;
12198 }
12199
12200 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
12201                               const struct bpf_object_open_opts *opts)
12202 {
12203         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
12204                 .object_name = s->name,
12205         );
12206         struct bpf_object *obj;
12207         int err;
12208
12209         /* Attempt to preserve opts->object_name, unless overriden by user
12210          * explicitly. Overwriting object name for skeletons is discouraged,
12211          * as it breaks global data maps, because they contain object name
12212          * prefix as their own map name prefix. When skeleton is generated,
12213          * bpftool is making an assumption that this name will stay the same.
12214          */
12215         if (opts) {
12216                 memcpy(&skel_opts, opts, sizeof(*opts));
12217                 if (!opts->object_name)
12218                         skel_opts.object_name = s->name;
12219         }
12220
12221         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
12222         err = libbpf_get_error(obj);
12223         if (err) {
12224                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
12225                         s->name, err);
12226                 return libbpf_err(err);
12227         }
12228
12229         *s->obj = obj;
12230         err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
12231         if (err) {
12232                 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
12233                 return libbpf_err(err);
12234         }
12235
12236         err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
12237         if (err) {
12238                 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
12239                 return libbpf_err(err);
12240         }
12241
12242         return 0;
12243 }
12244
12245 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
12246 {
12247         int err, len, var_idx, i;
12248         const char *var_name;
12249         const struct bpf_map *map;
12250         struct btf *btf;
12251         __u32 map_type_id;
12252         const struct btf_type *map_type, *var_type;
12253         const struct bpf_var_skeleton *var_skel;
12254         struct btf_var_secinfo *var;
12255
12256         if (!s->obj)
12257                 return libbpf_err(-EINVAL);
12258
12259         btf = bpf_object__btf(s->obj);
12260         if (!btf) {
12261                 pr_warn("subskeletons require BTF at runtime (object %s)\n",
12262                         bpf_object__name(s->obj));
12263                 return libbpf_err(-errno);
12264         }
12265
12266         err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
12267         if (err) {
12268                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12269                 return libbpf_err(err);
12270         }
12271
12272         err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
12273         if (err) {
12274                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12275                 return libbpf_err(err);
12276         }
12277
12278         for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
12279                 var_skel = &s->vars[var_idx];
12280                 map = *var_skel->map;
12281                 map_type_id = bpf_map__btf_value_type_id(map);
12282                 map_type = btf__type_by_id(btf, map_type_id);
12283
12284                 if (!btf_is_datasec(map_type)) {
12285                         pr_warn("type for map '%1$s' is not a datasec: %2$s",
12286                                 bpf_map__name(map),
12287                                 __btf_kind_str(btf_kind(map_type)));
12288                         return libbpf_err(-EINVAL);
12289                 }
12290
12291                 len = btf_vlen(map_type);
12292                 var = btf_var_secinfos(map_type);
12293                 for (i = 0; i < len; i++, var++) {
12294                         var_type = btf__type_by_id(btf, var->type);
12295                         var_name = btf__name_by_offset(btf, var_type->name_off);
12296                         if (strcmp(var_name, var_skel->name) == 0) {
12297                                 *var_skel->addr = map->mmaped + var->offset;
12298                                 break;
12299                         }
12300                 }
12301         }
12302         return 0;
12303 }
12304
12305 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
12306 {
12307         if (!s)
12308                 return;
12309         free(s->maps);
12310         free(s->progs);
12311         free(s->vars);
12312         free(s);
12313 }
12314
12315 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
12316 {
12317         int i, err;
12318
12319         err = bpf_object__load(*s->obj);
12320         if (err) {
12321                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
12322                 return libbpf_err(err);
12323         }
12324
12325         for (i = 0; i < s->map_cnt; i++) {
12326                 struct bpf_map *map = *s->maps[i].map;
12327                 size_t mmap_sz = bpf_map_mmap_sz(map);
12328                 int prot, map_fd = bpf_map__fd(map);
12329                 void **mmaped = s->maps[i].mmaped;
12330
12331                 if (!mmaped)
12332                         continue;
12333
12334                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
12335                         *mmaped = NULL;
12336                         continue;
12337                 }
12338
12339                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
12340                         prot = PROT_READ;
12341                 else
12342                         prot = PROT_READ | PROT_WRITE;
12343
12344                 /* Remap anonymous mmap()-ed "map initialization image" as
12345                  * a BPF map-backed mmap()-ed memory, but preserving the same
12346                  * memory address. This will cause kernel to change process'
12347                  * page table to point to a different piece of kernel memory,
12348                  * but from userspace point of view memory address (and its
12349                  * contents, being identical at this point) will stay the
12350                  * same. This mapping will be released by bpf_object__close()
12351                  * as per normal clean up procedure, so we don't need to worry
12352                  * about it from skeleton's clean up perspective.
12353                  */
12354                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
12355                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
12356                 if (*mmaped == MAP_FAILED) {
12357                         err = -errno;
12358                         *mmaped = NULL;
12359                         pr_warn("failed to re-mmap() map '%s': %d\n",
12360                                  bpf_map__name(map), err);
12361                         return libbpf_err(err);
12362                 }
12363         }
12364
12365         return 0;
12366 }
12367
12368 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
12369 {
12370         int i, err;
12371
12372         for (i = 0; i < s->prog_cnt; i++) {
12373                 struct bpf_program *prog = *s->progs[i].prog;
12374                 struct bpf_link **link = s->progs[i].link;
12375
12376                 if (!prog->autoload || !prog->autoattach)
12377                         continue;
12378
12379                 /* auto-attaching not supported for this program */
12380                 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12381                         continue;
12382
12383                 /* if user already set the link manually, don't attempt auto-attach */
12384                 if (*link)
12385                         continue;
12386
12387                 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
12388                 if (err) {
12389                         pr_warn("prog '%s': failed to auto-attach: %d\n",
12390                                 bpf_program__name(prog), err);
12391                         return libbpf_err(err);
12392                 }
12393
12394                 /* It's possible that for some SEC() definitions auto-attach
12395                  * is supported in some cases (e.g., if definition completely
12396                  * specifies target information), but is not in other cases.
12397                  * SEC("uprobe") is one such case. If user specified target
12398                  * binary and function name, such BPF program can be
12399                  * auto-attached. But if not, it shouldn't trigger skeleton's
12400                  * attach to fail. It should just be skipped.
12401                  * attach_fn signals such case with returning 0 (no error) and
12402                  * setting link to NULL.
12403                  */
12404         }
12405
12406         return 0;
12407 }
12408
12409 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
12410 {
12411         int i;
12412
12413         for (i = 0; i < s->prog_cnt; i++) {
12414                 struct bpf_link **link = s->progs[i].link;
12415
12416                 bpf_link__destroy(*link);
12417                 *link = NULL;
12418         }
12419 }
12420
12421 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
12422 {
12423         if (!s)
12424                 return;
12425
12426         if (s->progs)
12427                 bpf_object__detach_skeleton(s);
12428         if (s->obj)
12429                 bpf_object__close(*s->obj);
12430         free(s->maps);
12431         free(s->progs);
12432         free(s);
12433 }