tools/lib/bpf/libbpf.c

   1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3 /*
   4  * Common eBPF ELF object loading operations.
   5  *
   6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
   7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
   8  * Copyright (C) 2015 Huawei Inc.
   9  * Copyright (C) 2017 Nicira, Inc.
  10  * Copyright (C) 2019 Isovalent, Inc.
  11  */
  12
  13 #ifndef _GNU_SOURCE
  14 #define _GNU_SOURCE
  15 #endif
  16 #include <stdlib.h>
  17 #include <stdio.h>
  18 #include <stdarg.h>
  19 #include <libgen.h>
  20 #include <inttypes.h>
  21 #include <limits.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <endian.h>
  25 #include <fcntl.h>
  26 #include <errno.h>
  27 #include <ctype.h>
  28 #include <asm/unistd.h>
  29 #include <linux/err.h>
  30 #include <linux/kernel.h>
  31 #include <linux/bpf.h>
  32 #include <linux/btf.h>
  33 #include <linux/filter.h>
  34 #include <linux/limits.h>
  35 #include <linux/perf_event.h>
  36 #include <linux/ring_buffer.h>
  37 #include <sys/epoll.h>
  38 #include <sys/ioctl.h>
  39 #include <sys/mman.h>
  40 #include <sys/stat.h>
  41 #include <sys/types.h>
  42 #include <sys/vfs.h>
  43 #include <sys/utsname.h>
  44 #include <sys/resource.h>
  45 #include <libelf.h>
  46 #include <gelf.h>
  47 #include <zlib.h>
  48
  49 #include "libbpf.h"
  50 #include "bpf.h"
  51 #include "btf.h"
  52 #include "str_error.h"
  53 #include "libbpf_internal.h"
  54 #include "hashmap.h"
  55 #include "bpf_gen_internal.h"
  56 #include "zip.h"
  57
  58 #ifndef BPF_FS_MAGIC
  59 #define BPF_FS_MAGIC            0xcafe4a11
  60 #endif
  61
  62 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
  63
  64 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
  65  * compilation if user enables corresponding warning. Disable it explicitly.
  66  */
  67 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
  68
  69 #define __printf(a, b)  __attribute__((format(printf, a, b)))
  70
  71 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
  72 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
  73
  74 static const char * const attach_type_name[] = {
  75         [BPF_CGROUP_INET_INGRESS]       = "cgroup_inet_ingress",
  76         [BPF_CGROUP_INET_EGRESS]        = "cgroup_inet_egress",
  77         [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
  78         [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
  79         [BPF_CGROUP_SOCK_OPS]           = "cgroup_sock_ops",
  80         [BPF_CGROUP_DEVICE]             = "cgroup_device",
  81         [BPF_CGROUP_INET4_BIND]         = "cgroup_inet4_bind",
  82         [BPF_CGROUP_INET6_BIND]         = "cgroup_inet6_bind",
  83         [BPF_CGROUP_INET4_CONNECT]      = "cgroup_inet4_connect",
  84         [BPF_CGROUP_INET6_CONNECT]      = "cgroup_inet6_connect",
  85         [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
  86         [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
  87         [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
  88         [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
  89         [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
  90         [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
  91         [BPF_CGROUP_UDP4_SENDMSG]       = "cgroup_udp4_sendmsg",
  92         [BPF_CGROUP_UDP6_SENDMSG]       = "cgroup_udp6_sendmsg",
  93         [BPF_CGROUP_SYSCTL]             = "cgroup_sysctl",
  94         [BPF_CGROUP_UDP4_RECVMSG]       = "cgroup_udp4_recvmsg",
  95         [BPF_CGROUP_UDP6_RECVMSG]       = "cgroup_udp6_recvmsg",
  96         [BPF_CGROUP_GETSOCKOPT]         = "cgroup_getsockopt",
  97         [BPF_CGROUP_SETSOCKOPT]         = "cgroup_setsockopt",
  98         [BPF_SK_SKB_STREAM_PARSER]      = "sk_skb_stream_parser",
  99         [BPF_SK_SKB_STREAM_VERDICT]     = "sk_skb_stream_verdict",
 100         [BPF_SK_SKB_VERDICT]            = "sk_skb_verdict",
 101         [BPF_SK_MSG_VERDICT]            = "sk_msg_verdict",
 102         [BPF_LIRC_MODE2]                = "lirc_mode2",
 103         [BPF_FLOW_DISSECTOR]            = "flow_dissector",
 104         [BPF_TRACE_RAW_TP]              = "trace_raw_tp",
 105         [BPF_TRACE_FENTRY]              = "trace_fentry",
 106         [BPF_TRACE_FEXIT]               = "trace_fexit",
 107         [BPF_MODIFY_RETURN]             = "modify_return",
 108         [BPF_LSM_MAC]                   = "lsm_mac",
 109         [BPF_LSM_CGROUP]                = "lsm_cgroup",
 110         [BPF_SK_LOOKUP]                 = "sk_lookup",
 111         [BPF_TRACE_ITER]                = "trace_iter",
 112         [BPF_XDP_DEVMAP]                = "xdp_devmap",
 113         [BPF_XDP_CPUMAP]                = "xdp_cpumap",
 114         [BPF_XDP]                       = "xdp",
 115         [BPF_SK_REUSEPORT_SELECT]       = "sk_reuseport_select",
 116         [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
 117         [BPF_PERF_EVENT]                = "perf_event",
 118         [BPF_TRACE_KPROBE_MULTI]        = "trace_kprobe_multi",
 119         [BPF_STRUCT_OPS]                = "struct_ops",
 120         [BPF_NETFILTER]                 = "netfilter",
 121 };
 122
 123 static const char * const link_type_name[] = {
 124         [BPF_LINK_TYPE_UNSPEC]                  = "unspec",
 125         [BPF_LINK_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 126         [BPF_LINK_TYPE_TRACING]                 = "tracing",
 127         [BPF_LINK_TYPE_CGROUP]                  = "cgroup",
 128         [BPF_LINK_TYPE_ITER]                    = "iter",
 129         [BPF_LINK_TYPE_NETNS]                   = "netns",
 130         [BPF_LINK_TYPE_XDP]                     = "xdp",
 131         [BPF_LINK_TYPE_PERF_EVENT]              = "perf_event",
 132         [BPF_LINK_TYPE_KPROBE_MULTI]            = "kprobe_multi",
 133         [BPF_LINK_TYPE_STRUCT_OPS]              = "struct_ops",
 134         [BPF_LINK_TYPE_NETFILTER]               = "netfilter",
 135 };
 136
 137 static const char * const map_type_name[] = {
 138         [BPF_MAP_TYPE_UNSPEC]                   = "unspec",
 139         [BPF_MAP_TYPE_HASH]                     = "hash",
 140         [BPF_MAP_TYPE_ARRAY]                    = "array",
 141         [BPF_MAP_TYPE_PROG_ARRAY]               = "prog_array",
 142         [BPF_MAP_TYPE_PERF_EVENT_ARRAY]         = "perf_event_array",
 143         [BPF_MAP_TYPE_PERCPU_HASH]              = "percpu_hash",
 144         [BPF_MAP_TYPE_PERCPU_ARRAY]             = "percpu_array",
 145         [BPF_MAP_TYPE_STACK_TRACE]              = "stack_trace",
 146         [BPF_MAP_TYPE_CGROUP_ARRAY]             = "cgroup_array",
 147         [BPF_MAP_TYPE_LRU_HASH]                 = "lru_hash",
 148         [BPF_MAP_TYPE_LRU_PERCPU_HASH]          = "lru_percpu_hash",
 149         [BPF_MAP_TYPE_LPM_TRIE]                 = "lpm_trie",
 150         [BPF_MAP_TYPE_ARRAY_OF_MAPS]            = "array_of_maps",
 151         [BPF_MAP_TYPE_HASH_OF_MAPS]             = "hash_of_maps",
 152         [BPF_MAP_TYPE_DEVMAP]                   = "devmap",
 153         [BPF_MAP_TYPE_DEVMAP_HASH]              = "devmap_hash",
 154         [BPF_MAP_TYPE_SOCKMAP]                  = "sockmap",
 155         [BPF_MAP_TYPE_CPUMAP]                   = "cpumap",
 156         [BPF_MAP_TYPE_XSKMAP]                   = "xskmap",
 157         [BPF_MAP_TYPE_SOCKHASH]                 = "sockhash",
 158         [BPF_MAP_TYPE_CGROUP_STORAGE]           = "cgroup_storage",
 159         [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]      = "reuseport_sockarray",
 160         [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
 161         [BPF_MAP_TYPE_QUEUE]                    = "queue",
 162         [BPF_MAP_TYPE_STACK]                    = "stack",
 163         [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
 164         [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
 165         [BPF_MAP_TYPE_RINGBUF]                  = "ringbuf",
 166         [BPF_MAP_TYPE_INODE_STORAGE]            = "inode_storage",
 167         [BPF_MAP_TYPE_TASK_STORAGE]             = "task_storage",
 168         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
 169         [BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 170         [BPF_MAP_TYPE_CGRP_STORAGE]             = "cgrp_storage",
 171 };
 172
 173 static const char * const prog_type_name[] = {
 174         [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
 175         [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
 176         [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
 177         [BPF_PROG_TYPE_SCHED_CLS]               = "sched_cls",
 178         [BPF_PROG_TYPE_SCHED_ACT]               = "sched_act",
 179         [BPF_PROG_TYPE_TRACEPOINT]              = "tracepoint",
 180         [BPF_PROG_TYPE_XDP]                     = "xdp",
 181         [BPF_PROG_TYPE_PERF_EVENT]              = "perf_event",
 182         [BPF_PROG_TYPE_CGROUP_SKB]              = "cgroup_skb",
 183         [BPF_PROG_TYPE_CGROUP_SOCK]             = "cgroup_sock",
 184         [BPF_PROG_TYPE_LWT_IN]                  = "lwt_in",
 185         [BPF_PROG_TYPE_LWT_OUT]                 = "lwt_out",
 186         [BPF_PROG_TYPE_LWT_XMIT]                = "lwt_xmit",
 187         [BPF_PROG_TYPE_SOCK_OPS]                = "sock_ops",
 188         [BPF_PROG_TYPE_SK_SKB]                  = "sk_skb",
 189         [BPF_PROG_TYPE_CGROUP_DEVICE]           = "cgroup_device",
 190         [BPF_PROG_TYPE_SK_MSG]                  = "sk_msg",
 191         [BPF_PROG_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 192         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]        = "cgroup_sock_addr",
 193         [BPF_PROG_TYPE_LWT_SEG6LOCAL]           = "lwt_seg6local",
 194         [BPF_PROG_TYPE_LIRC_MODE2]              = "lirc_mode2",
 195         [BPF_PROG_TYPE_SK_REUSEPORT]            = "sk_reuseport",
 196         [BPF_PROG_TYPE_FLOW_DISSECTOR]          = "flow_dissector",
 197         [BPF_PROG_TYPE_CGROUP_SYSCTL]           = "cgroup_sysctl",
 198         [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
 199         [BPF_PROG_TYPE_CGROUP_SOCKOPT]          = "cgroup_sockopt",
 200         [BPF_PROG_TYPE_TRACING]                 = "tracing",
 201         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
 202         [BPF_PROG_TYPE_EXT]                     = "ext",
 203         [BPF_PROG_TYPE_LSM]                     = "lsm",
 204         [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
 205         [BPF_PROG_TYPE_SYSCALL]                 = "syscall",
 206         [BPF_PROG_TYPE_NETFILTER]               = "netfilter",
 207 };
 208
 209 static int __base_pr(enum libbpf_print_level level, const char *format,
 210                      va_list args)
 211 {
 212         if (level == LIBBPF_DEBUG)
 213                 return 0;
 214
 215         return vfprintf(stderr, format, args);
 216 }
 217
 218 static libbpf_print_fn_t __libbpf_pr = __base_pr;
 219
 220 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
 221 {
 222         libbpf_print_fn_t old_print_fn;
 223
 224         old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
 225
 226         return old_print_fn;
 227 }
 228
 229 __printf(2, 3)
 230 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 231 {
 232         va_list args;
 233         int old_errno;
 234         libbpf_print_fn_t print_fn;
 235
 236         print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
 237         if (!print_fn)
 238                 return;
 239
 240         old_errno = errno;
 241
 242         va_start(args, format);
 243         __libbpf_pr(level, format, args);
 244         va_end(args);
 245
 246         errno = old_errno;
 247 }
 248
 249 static void pr_perm_msg(int err)
 250 {
 251         struct rlimit limit;
 252         char buf[100];
 253
 254         if (err != -EPERM || geteuid() != 0)
 255                 return;
 256
 257         err = getrlimit(RLIMIT_MEMLOCK, &limit);
 258         if (err)
 259                 return;
 260
 261         if (limit.rlim_cur == RLIM_INFINITY)
 262                 return;
 263
 264         if (limit.rlim_cur < 1024)
 265                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
 266         else if (limit.rlim_cur < 1024*1024)
 267                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
 268         else
 269                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
 270
 271         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
 272                 buf);
 273 }
 274
 275 #define STRERR_BUFSIZE  128
 276
 277 /* Copied from tools/perf/util/util.h */
 278 #ifndef zfree
 279 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
 280 #endif
 281
 282 #ifndef zclose
 283 # define zclose(fd) ({                  \
 284         int ___err = 0;                 \
 285         if ((fd) >= 0)                  \
 286                 ___err = close((fd));   \
 287         fd = -1;                        \
 288         ___err; })
 289 #endif
 290
 291 static inline __u64 ptr_to_u64(const void *ptr)
 292 {
 293         return (__u64) (unsigned long) ptr;
 294 }
 295
 296 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 297 {
 298         /* as of v1.0 libbpf_set_strict_mode() is a no-op */
 299         return 0;
 300 }
 301
 302 __u32 libbpf_major_version(void)
 303 {
 304         return LIBBPF_MAJOR_VERSION;
 305 }
 306
 307 __u32 libbpf_minor_version(void)
 308 {
 309         return LIBBPF_MINOR_VERSION;
 310 }
 311
 312 const char *libbpf_version_string(void)
 313 {
 314 #define __S(X) #X
 315 #define _S(X) __S(X)
 316         return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
 317 #undef _S
 318 #undef __S
 319 }
 320
 321 enum reloc_type {
 322         RELO_LD64,
 323         RELO_CALL,
 324         RELO_DATA,
 325         RELO_EXTERN_LD64,
 326         RELO_EXTERN_CALL,
 327         RELO_SUBPROG_ADDR,
 328         RELO_CORE,
 329 };
 330
 331 struct reloc_desc {
 332         enum reloc_type type;
 333         int insn_idx;
 334         union {
 335                 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
 336                 struct {
 337                         int map_idx;
 338                         int sym_off;
 339                         int ext_idx;
 340                 };
 341         };
 342 };
 343
 344 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
 345 enum sec_def_flags {
 346         SEC_NONE = 0,
 347         /* expected_attach_type is optional, if kernel doesn't support that */
 348         SEC_EXP_ATTACH_OPT = 1,
 349         /* legacy, only used by libbpf_get_type_names() and
 350          * libbpf_attach_type_by_name(), not used by libbpf itself at all.
 351          * This used to be associated with cgroup (and few other) BPF programs
 352          * that were attachable through BPF_PROG_ATTACH command. Pretty
 353          * meaningless nowadays, though.
 354          */
 355         SEC_ATTACHABLE = 2,
 356         SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
 357         /* attachment target is specified through BTF ID in either kernel or
 358          * other BPF program's BTF object
 359          */
 360         SEC_ATTACH_BTF = 4,
 361         /* BPF program type allows sleeping/blocking in kernel */
 362         SEC_SLEEPABLE = 8,
 363         /* BPF program support non-linear XDP buffer */
 364         SEC_XDP_FRAGS = 16,
 365 };
 366
 367 struct bpf_sec_def {
 368         char *sec;
 369         enum bpf_prog_type prog_type;
 370         enum bpf_attach_type expected_attach_type;
 371         long cookie;
 372         int handler_id;
 373
 374         libbpf_prog_setup_fn_t prog_setup_fn;
 375         libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
 376         libbpf_prog_attach_fn_t prog_attach_fn;
 377 };
 378
 379 /*
 380  * bpf_prog should be a better name but it has been used in
 381  * linux/filter.h.
 382  */
 383 struct bpf_program {
 384         char *name;
 385         char *sec_name;
 386         size_t sec_idx;
 387         const struct bpf_sec_def *sec_def;
 388         /* this program's instruction offset (in number of instructions)
 389          * within its containing ELF section
 390          */
 391         size_t sec_insn_off;
 392         /* number of original instructions in ELF section belonging to this
 393          * program, not taking into account subprogram instructions possible
 394          * appended later during relocation
 395          */
 396         size_t sec_insn_cnt;
 397         /* Offset (in number of instructions) of the start of instruction
 398          * belonging to this BPF program  within its containing main BPF
 399          * program. For the entry-point (main) BPF program, this is always
 400          * zero. For a sub-program, this gets reset before each of main BPF
 401          * programs are processed and relocated and is used to determined
 402          * whether sub-program was already appended to the main program, and
 403          * if yes, at which instruction offset.
 404          */
 405         size_t sub_insn_off;
 406
 407         /* instructions that belong to BPF program; insns[0] is located at
 408          * sec_insn_off instruction within its ELF section in ELF file, so
 409          * when mapping ELF file instruction index to the local instruction,
 410          * one needs to subtract sec_insn_off; and vice versa.
 411          */
 412         struct bpf_insn *insns;
 413         /* actual number of instruction in this BPF program's image; for
 414          * entry-point BPF programs this includes the size of main program
 415          * itself plus all the used sub-programs, appended at the end
 416          */
 417         size_t insns_cnt;
 418
 419         struct reloc_desc *reloc_desc;
 420         int nr_reloc;
 421
 422         /* BPF verifier log settings */
 423         char *log_buf;
 424         size_t log_size;
 425         __u32 log_level;
 426
 427         struct bpf_object *obj;
 428
 429         int fd;
 430         bool autoload;
 431         bool autoattach;
 432         bool mark_btf_static;
 433         enum bpf_prog_type type;
 434         enum bpf_attach_type expected_attach_type;
 435
 436         int prog_ifindex;
 437         __u32 attach_btf_obj_fd;
 438         __u32 attach_btf_id;
 439         __u32 attach_prog_fd;
 440
 441         void *func_info;
 442         __u32 func_info_rec_size;
 443         __u32 func_info_cnt;
 444
 445         void *line_info;
 446         __u32 line_info_rec_size;
 447         __u32 line_info_cnt;
 448         __u32 prog_flags;
 449 };
 450
 451 struct bpf_struct_ops {
 452         const char *tname;
 453         const struct btf_type *type;
 454         struct bpf_program **progs;
 455         __u32 *kern_func_off;
 456         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
 457         void *data;
 458         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
 459          *      btf_vmlinux's format.
 460          * struct bpf_struct_ops_tcp_congestion_ops {
 461          *      [... some other kernel fields ...]
 462          *      struct tcp_congestion_ops data;
 463          * }
 464          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
 465          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
 466          * from "data".
 467          */
 468         void *kern_vdata;
 469         __u32 type_id;
 470 };
 471
 472 #define DATA_SEC ".data"
 473 #define BSS_SEC ".bss"
 474 #define RODATA_SEC ".rodata"
 475 #define KCONFIG_SEC ".kconfig"
 476 #define KSYMS_SEC ".ksyms"
 477 #define STRUCT_OPS_SEC ".struct_ops"
 478 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
 479
 480 enum libbpf_map_type {
 481         LIBBPF_MAP_UNSPEC,
 482         LIBBPF_MAP_DATA,
 483         LIBBPF_MAP_BSS,
 484         LIBBPF_MAP_RODATA,
 485         LIBBPF_MAP_KCONFIG,
 486 };
 487
 488 struct bpf_map_def {
 489         unsigned int type;
 490         unsigned int key_size;
 491         unsigned int value_size;
 492         unsigned int max_entries;
 493         unsigned int map_flags;
 494 };
 495
 496 struct bpf_map {
 497         struct bpf_object *obj;
 498         char *name;
 499         /* real_name is defined for special internal maps (.rodata*,
 500          * .data*, .bss, .kconfig) and preserves their original ELF section
 501          * name. This is important to be able to find corresponding BTF
 502          * DATASEC information.
 503          */
 504         char *real_name;
 505         int fd;
 506         int sec_idx;
 507         size_t sec_offset;
 508         int map_ifindex;
 509         int inner_map_fd;
 510         struct bpf_map_def def;
 511         __u32 numa_node;
 512         __u32 btf_var_idx;
 513         __u32 btf_key_type_id;
 514         __u32 btf_value_type_id;
 515         __u32 btf_vmlinux_value_type_id;
 516         enum libbpf_map_type libbpf_type;
 517         void *mmaped;
 518         struct bpf_struct_ops *st_ops;
 519         struct bpf_map *inner_map;
 520         void **init_slots;
 521         int init_slots_sz;
 522         char *pin_path;
 523         bool pinned;
 524         bool reused;
 525         bool autocreate;
 526         __u64 map_extra;
 527 };
 528
 529 enum extern_type {
 530         EXT_UNKNOWN,
 531         EXT_KCFG,
 532         EXT_KSYM,
 533 };
 534
 535 enum kcfg_type {
 536         KCFG_UNKNOWN,
 537         KCFG_CHAR,
 538         KCFG_BOOL,
 539         KCFG_INT,
 540         KCFG_TRISTATE,
 541         KCFG_CHAR_ARR,
 542 };
 543
 544 struct extern_desc {
 545         enum extern_type type;
 546         int sym_idx;
 547         int btf_id;
 548         int sec_btf_id;
 549         const char *name;
 550         bool is_set;
 551         bool is_weak;
 552         union {
 553                 struct {
 554                         enum kcfg_type type;
 555                         int sz;
 556                         int align;
 557                         int data_off;
 558                         bool is_signed;
 559                 } kcfg;
 560                 struct {
 561                         unsigned long long addr;
 562
 563                         /* target btf_id of the corresponding kernel var. */
 564                         int kernel_btf_obj_fd;
 565                         int kernel_btf_id;
 566
 567                         /* local btf_id of the ksym extern's type. */
 568                         __u32 type_id;
 569                         /* BTF fd index to be patched in for insn->off, this is
 570                          * 0 for vmlinux BTF, index in obj->fd_array for module
 571                          * BTF
 572                          */
 573                         __s16 btf_fd_idx;
 574                 } ksym;
 575         };
 576 };
 577
 578 struct module_btf {
 579         struct btf *btf;
 580         char *name;
 581         __u32 id;
 582         int fd;
 583         int fd_array_idx;
 584 };
 585
 586 enum sec_type {
 587         SEC_UNUSED = 0,
 588         SEC_RELO,
 589         SEC_BSS,
 590         SEC_DATA,
 591         SEC_RODATA,
 592 };
 593
 594 struct elf_sec_desc {
 595         enum sec_type sec_type;
 596         Elf64_Shdr *shdr;
 597         Elf_Data *data;
 598 };
 599
 600 struct elf_state {
 601         int fd;
 602         const void *obj_buf;
 603         size_t obj_buf_sz;
 604         Elf *elf;
 605         Elf64_Ehdr *ehdr;
 606         Elf_Data *symbols;
 607         Elf_Data *st_ops_data;
 608         Elf_Data *st_ops_link_data;
 609         size_t shstrndx; /* section index for section name strings */
 610         size_t strtabidx;
 611         struct elf_sec_desc *secs;
 612         size_t sec_cnt;
 613         int btf_maps_shndx;
 614         __u32 btf_maps_sec_btf_id;
 615         int text_shndx;
 616         int symbols_shndx;
 617         int st_ops_shndx;
 618         int st_ops_link_shndx;
 619 };
 620
 621 struct usdt_manager;
 622
 623 struct bpf_object {
 624         char name[BPF_OBJ_NAME_LEN];
 625         char license[64];
 626         __u32 kern_version;
 627
 628         struct bpf_program *programs;
 629         size_t nr_programs;
 630         struct bpf_map *maps;
 631         size_t nr_maps;
 632         size_t maps_cap;
 633
 634         char *kconfig;
 635         struct extern_desc *externs;
 636         int nr_extern;
 637         int kconfig_map_idx;
 638
 639         bool loaded;
 640         bool has_subcalls;
 641         bool has_rodata;
 642
 643         struct bpf_gen *gen_loader;
 644
 645         /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
 646         struct elf_state efile;
 647
 648         struct btf *btf;
 649         struct btf_ext *btf_ext;
 650
 651         /* Parse and load BTF vmlinux if any of the programs in the object need
 652          * it at load time.
 653          */
 654         struct btf *btf_vmlinux;
 655         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
 656          * override for vmlinux BTF.
 657          */
 658         char *btf_custom_path;
 659         /* vmlinux BTF override for CO-RE relocations */
 660         struct btf *btf_vmlinux_override;
 661         /* Lazily initialized kernel module BTFs */
 662         struct module_btf *btf_modules;
 663         bool btf_modules_loaded;
 664         size_t btf_module_cnt;
 665         size_t btf_module_cap;
 666
 667         /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
 668         char *log_buf;
 669         size_t log_size;
 670         __u32 log_level;
 671
 672         int *fd_array;
 673         size_t fd_array_cap;
 674         size_t fd_array_cnt;
 675
 676         struct usdt_manager *usdt_man;
 677
 678         char path[];
 679 };
 680
 681 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
 682 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
 683 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
 684 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
 685 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
 686 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
 687 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
 688 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
 689 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 690
 691 void bpf_program__unload(struct bpf_program *prog)
 692 {
 693         if (!prog)
 694                 return;
 695
 696         zclose(prog->fd);
 697
 698         zfree(&prog->func_info);
 699         zfree(&prog->line_info);
 700 }
 701
 702 static void bpf_program__exit(struct bpf_program *prog)
 703 {
 704         if (!prog)
 705                 return;
 706
 707         bpf_program__unload(prog);
 708         zfree(&prog->name);
 709         zfree(&prog->sec_name);
 710         zfree(&prog->insns);
 711         zfree(&prog->reloc_desc);
 712
 713         prog->nr_reloc = 0;
 714         prog->insns_cnt = 0;
 715         prog->sec_idx = -1;
 716 }
 717
 718 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 719 {
 720         return BPF_CLASS(insn->code) == BPF_JMP &&
 721                BPF_OP(insn->code) == BPF_CALL &&
 722                BPF_SRC(insn->code) == BPF_K &&
 723                insn->src_reg == BPF_PSEUDO_CALL &&
 724                insn->dst_reg == 0 &&
 725                insn->off == 0;
 726 }
 727
 728 static bool is_call_insn(const struct bpf_insn *insn)
 729 {
 730         return insn->code == (BPF_JMP | BPF_CALL);
 731 }
 732
 733 static bool insn_is_pseudo_func(struct bpf_insn *insn)
 734 {
 735         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 736 }
 737
 738 static int
 739 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 740                       const char *name, size_t sec_idx, const char *sec_name,
 741                       size_t sec_off, void *insn_data, size_t insn_data_sz)
 742 {
 743         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 744                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 745                         sec_name, name, sec_off, insn_data_sz);
 746                 return -EINVAL;
 747         }
 748
 749         memset(prog, 0, sizeof(*prog));
 750         prog->obj = obj;
 751
 752         prog->sec_idx = sec_idx;
 753         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
 754         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
 755         /* insns_cnt can later be increased by appending used subprograms */
 756         prog->insns_cnt = prog->sec_insn_cnt;
 757
 758         prog->type = BPF_PROG_TYPE_UNSPEC;
 759         prog->fd = -1;
 760
 761         /* libbpf's convention for SEC("?abc...") is that it's just like
 762          * SEC("abc...") but the corresponding bpf_program starts out with
 763          * autoload set to false.
 764          */
 765         if (sec_name[0] == '?') {
 766                 prog->autoload = false;
 767                 /* from now on forget there was ? in section name */
 768                 sec_name++;
 769         } else {
 770                 prog->autoload = true;
 771         }
 772
 773         prog->autoattach = true;
 774
 775         /* inherit object's log_level */
 776         prog->log_level = obj->log_level;
 777
 778         prog->sec_name = strdup(sec_name);
 779         if (!prog->sec_name)
 780                 goto errout;
 781
 782         prog->name = strdup(name);
 783         if (!prog->name)
 784                 goto errout;
 785
 786         prog->insns = malloc(insn_data_sz);
 787         if (!prog->insns)
 788                 goto errout;
 789         memcpy(prog->insns, insn_data, insn_data_sz);
 790
 791         return 0;
 792 errout:
 793         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
 794         bpf_program__exit(prog);
 795         return -ENOMEM;
 796 }
 797
 798 static int
 799 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 800                          const char *sec_name, int sec_idx)
 801 {
 802         Elf_Data *symbols = obj->efile.symbols;
 803         struct bpf_program *prog, *progs;
 804         void *data = sec_data->d_buf;
 805         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
 806         int nr_progs, err, i;
 807         const char *name;
 808         Elf64_Sym *sym;
 809
 810         progs = obj->programs;
 811         nr_progs = obj->nr_programs;
 812         nr_syms = symbols->d_size / sizeof(Elf64_Sym);
 813
 814         for (i = 0; i < nr_syms; i++) {
 815                 sym = elf_sym_by_idx(obj, i);
 816
 817                 if (sym->st_shndx != sec_idx)
 818                         continue;
 819                 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
 820                         continue;
 821
 822                 prog_sz = sym->st_size;
 823                 sec_off = sym->st_value;
 824
 825                 name = elf_sym_str(obj, sym->st_name);
 826                 if (!name) {
 827                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
 828                                 sec_name, sec_off);
 829                         return -LIBBPF_ERRNO__FORMAT;
 830                 }
 831
 832                 if (sec_off + prog_sz > sec_sz) {
 833                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
 834                                 sec_name, sec_off);
 835                         return -LIBBPF_ERRNO__FORMAT;
 836                 }
 837
 838                 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 839                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
 840                         return -ENOTSUP;
 841                 }
 842
 843                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
 844                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
 845
 846                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
 847                 if (!progs) {
 848                         /*
 849                          * In this case the original obj->programs
 850                          * is still valid, so don't need special treat for
 851                          * bpf_close_object().
 852                          */
 853                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
 854                                 sec_name, name);
 855                         return -ENOMEM;
 856                 }
 857                 obj->programs = progs;
 858
 859                 prog = &progs[nr_progs];
 860
 861                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
 862                                             sec_off, data + sec_off, prog_sz);
 863                 if (err)
 864                         return err;
 865
 866                 /* if function is a global/weak symbol, but has restricted
 867                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
 868                  * as static to enable more permissive BPF verification mode
 869                  * with more outside context available to BPF verifier
 870                  */
 871                 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
 872                     && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
 873                         || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
 874                         prog->mark_btf_static = true;
 875
 876                 nr_progs++;
 877                 obj->nr_programs = nr_progs;
 878         }
 879
 880         return 0;
 881 }
 882
 883 static const struct btf_member *
 884 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
 885 {
 886         struct btf_member *m;
 887         int i;
 888
 889         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 890                 if (btf_member_bit_offset(t, i) == bit_offset)
 891                         return m;
 892         }
 893
 894         return NULL;
 895 }
 896
 897 static const struct btf_member *
 898 find_member_by_name(const struct btf *btf, const struct btf_type *t,
 899                     const char *name)
 900 {
 901         struct btf_member *m;
 902         int i;
 903
 904         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 905                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
 906                         return m;
 907         }
 908
 909         return NULL;
 910 }
 911
 912 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
 913 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 914                                    const char *name, __u32 kind);
 915
 916 static int
 917 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
 918                            const struct btf_type **type, __u32 *type_id,
 919                            const struct btf_type **vtype, __u32 *vtype_id,
 920                            const struct btf_member **data_member)
 921 {
 922         const struct btf_type *kern_type, *kern_vtype;
 923         const struct btf_member *kern_data_member;
 924         __s32 kern_vtype_id, kern_type_id;
 925         __u32 i;
 926
 927         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
 928         if (kern_type_id < 0) {
 929                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
 930                         tname);
 931                 return kern_type_id;
 932         }
 933         kern_type = btf__type_by_id(btf, kern_type_id);
 934
 935         /* Find the corresponding "map_value" type that will be used
 936          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
 937          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
 938          * btf_vmlinux.
 939          */
 940         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
 941                                                 tname, BTF_KIND_STRUCT);
 942         if (kern_vtype_id < 0) {
 943                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
 944                         STRUCT_OPS_VALUE_PREFIX, tname);
 945                 return kern_vtype_id;
 946         }
 947         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
 948
 949         /* Find "struct tcp_congestion_ops" from
 950          * struct bpf_struct_ops_tcp_congestion_ops {
 951          *      [ ... ]
 952          *      struct tcp_congestion_ops data;
 953          * }
 954          */
 955         kern_data_member = btf_members(kern_vtype);
 956         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
 957                 if (kern_data_member->type == kern_type_id)
 958                         break;
 959         }
 960         if (i == btf_vlen(kern_vtype)) {
 961                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
 962                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
 963                 return -EINVAL;
 964         }
 965
 966         *type = kern_type;
 967         *type_id = kern_type_id;
 968         *vtype = kern_vtype;
 969         *vtype_id = kern_vtype_id;
 970         *data_member = kern_data_member;
 971
 972         return 0;
 973 }
 974
 975 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
 976 {
 977         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
 978 }
 979
 980 /* Init the map's fields that depend on kern_btf */
 981 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
 982                                          const struct btf *btf,
 983                                          const struct btf *kern_btf)
 984 {
 985         const struct btf_member *member, *kern_member, *kern_data_member;
 986         const struct btf_type *type, *kern_type, *kern_vtype;
 987         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
 988         struct bpf_struct_ops *st_ops;
 989         void *data, *kern_data;
 990         const char *tname;
 991         int err;
 992
 993         st_ops = map->st_ops;
 994         type = st_ops->type;
 995         tname = st_ops->tname;
 996         err = find_struct_ops_kern_types(kern_btf, tname,
 997                                          &kern_type, &kern_type_id,
 998                                          &kern_vtype, &kern_vtype_id,
 999                                          &kern_data_member);
1000         if (err)
1001                 return err;
1002
1003         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1004                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1005
1006         map->def.value_size = kern_vtype->size;
1007         map->btf_vmlinux_value_type_id = kern_vtype_id;
1008
1009         st_ops->kern_vdata = calloc(1, kern_vtype->size);
1010         if (!st_ops->kern_vdata)
1011                 return -ENOMEM;
1012
1013         data = st_ops->data;
1014         kern_data_off = kern_data_member->offset / 8;
1015         kern_data = st_ops->kern_vdata + kern_data_off;
1016
1017         member = btf_members(type);
1018         for (i = 0; i < btf_vlen(type); i++, member++) {
1019                 const struct btf_type *mtype, *kern_mtype;
1020                 __u32 mtype_id, kern_mtype_id;
1021                 void *mdata, *kern_mdata;
1022                 __s64 msize, kern_msize;
1023                 __u32 moff, kern_moff;
1024                 __u32 kern_member_idx;
1025                 const char *mname;
1026
1027                 mname = btf__name_by_offset(btf, member->name_off);
1028                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1029                 if (!kern_member) {
1030                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1031                                 map->name, mname);
1032                         return -ENOTSUP;
1033                 }
1034
1035                 kern_member_idx = kern_member - btf_members(kern_type);
1036                 if (btf_member_bitfield_size(type, i) ||
1037                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
1038                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1039                                 map->name, mname);
1040                         return -ENOTSUP;
1041                 }
1042
1043                 moff = member->offset / 8;
1044                 kern_moff = kern_member->offset / 8;
1045
1046                 mdata = data + moff;
1047                 kern_mdata = kern_data + kern_moff;
1048
1049                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1050                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1051                                                     &kern_mtype_id);
1052                 if (BTF_INFO_KIND(mtype->info) !=
1053                     BTF_INFO_KIND(kern_mtype->info)) {
1054                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1055                                 map->name, mname, BTF_INFO_KIND(mtype->info),
1056                                 BTF_INFO_KIND(kern_mtype->info));
1057                         return -ENOTSUP;
1058                 }
1059
1060                 if (btf_is_ptr(mtype)) {
1061                         struct bpf_program *prog;
1062
1063                         prog = st_ops->progs[i];
1064                         if (!prog)
1065                                 continue;
1066
1067                         kern_mtype = skip_mods_and_typedefs(kern_btf,
1068                                                             kern_mtype->type,
1069                                                             &kern_mtype_id);
1070
1071                         /* mtype->type must be a func_proto which was
1072                          * guaranteed in bpf_object__collect_st_ops_relos(),
1073                          * so only check kern_mtype for func_proto here.
1074                          */
1075                         if (!btf_is_func_proto(kern_mtype)) {
1076                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1077                                         map->name, mname);
1078                                 return -ENOTSUP;
1079                         }
1080
1081                         prog->attach_btf_id = kern_type_id;
1082                         prog->expected_attach_type = kern_member_idx;
1083
1084                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1085
1086                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1087                                  map->name, mname, prog->name, moff,
1088                                  kern_moff);
1089
1090                         continue;
1091                 }
1092
1093                 msize = btf__resolve_size(btf, mtype_id);
1094                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1095                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1096                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1097                                 map->name, mname, (ssize_t)msize,
1098                                 (ssize_t)kern_msize);
1099                         return -ENOTSUP;
1100                 }
1101
1102                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1103                          map->name, mname, (unsigned int)msize,
1104                          moff, kern_moff);
1105                 memcpy(kern_mdata, mdata, msize);
1106         }
1107
1108         return 0;
1109 }
1110
1111 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1112 {
1113         struct bpf_map *map;
1114         size_t i;
1115         int err;
1116
1117         for (i = 0; i < obj->nr_maps; i++) {
1118                 map = &obj->maps[i];
1119
1120                 if (!bpf_map__is_struct_ops(map))
1121                         continue;
1122
1123                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
1124                                                     obj->btf_vmlinux);
1125                 if (err)
1126                         return err;
1127         }
1128
1129         return 0;
1130 }
1131
1132 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1133                                 int shndx, Elf_Data *data, __u32 map_flags)
1134 {
1135         const struct btf_type *type, *datasec;
1136         const struct btf_var_secinfo *vsi;
1137         struct bpf_struct_ops *st_ops;
1138         const char *tname, *var_name;
1139         __s32 type_id, datasec_id;
1140         const struct btf *btf;
1141         struct bpf_map *map;
1142         __u32 i;
1143
1144         if (shndx == -1)
1145                 return 0;
1146
1147         btf = obj->btf;
1148         datasec_id = btf__find_by_name_kind(btf, sec_name,
1149                                             BTF_KIND_DATASEC);
1150         if (datasec_id < 0) {
1151                 pr_warn("struct_ops init: DATASEC %s not found\n",
1152                         sec_name);
1153                 return -EINVAL;
1154         }
1155
1156         datasec = btf__type_by_id(btf, datasec_id);
1157         vsi = btf_var_secinfos(datasec);
1158         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1159                 type = btf__type_by_id(obj->btf, vsi->type);
1160                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1161
1162                 type_id = btf__resolve_type(obj->btf, vsi->type);
1163                 if (type_id < 0) {
1164                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1165                                 vsi->type, sec_name);
1166                         return -EINVAL;
1167                 }
1168
1169                 type = btf__type_by_id(obj->btf, type_id);
1170                 tname = btf__name_by_offset(obj->btf, type->name_off);
1171                 if (!tname[0]) {
1172                         pr_warn("struct_ops init: anonymous type is not supported\n");
1173                         return -ENOTSUP;
1174                 }
1175                 if (!btf_is_struct(type)) {
1176                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1177                         return -EINVAL;
1178                 }
1179
1180                 map = bpf_object__add_map(obj);
1181                 if (IS_ERR(map))
1182                         return PTR_ERR(map);
1183
1184                 map->sec_idx = shndx;
1185                 map->sec_offset = vsi->offset;
1186                 map->name = strdup(var_name);
1187                 if (!map->name)
1188                         return -ENOMEM;
1189
1190                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1191                 map->def.key_size = sizeof(int);
1192                 map->def.value_size = type->size;
1193                 map->def.max_entries = 1;
1194                 map->def.map_flags = map_flags;
1195
1196                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1197                 if (!map->st_ops)
1198                         return -ENOMEM;
1199                 st_ops = map->st_ops;
1200                 st_ops->data = malloc(type->size);
1201                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1202                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1203                                                sizeof(*st_ops->kern_func_off));
1204                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1205                         return -ENOMEM;
1206
1207                 if (vsi->offset + type->size > data->d_size) {
1208                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1209                                 var_name, sec_name);
1210                         return -EINVAL;
1211                 }
1212
1213                 memcpy(st_ops->data,
1214                        data->d_buf + vsi->offset,
1215                        type->size);
1216                 st_ops->tname = tname;
1217                 st_ops->type = type;
1218                 st_ops->type_id = type_id;
1219
1220                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1221                          tname, type_id, var_name, vsi->offset);
1222         }
1223
1224         return 0;
1225 }
1226
1227 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1228 {
1229         int err;
1230
1231         err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
1232                                    obj->efile.st_ops_data, 0);
1233         err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
1234                                           obj->efile.st_ops_link_shndx,
1235                                           obj->efile.st_ops_link_data,
1236                                           BPF_F_LINK);
1237         return err;
1238 }
1239
1240 static struct bpf_object *bpf_object__new(const char *path,
1241                                           const void *obj_buf,
1242                                           size_t obj_buf_sz,
1243                                           const char *obj_name)
1244 {
1245         struct bpf_object *obj;
1246         char *end;
1247
1248         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1249         if (!obj) {
1250                 pr_warn("alloc memory failed for %s\n", path);
1251                 return ERR_PTR(-ENOMEM);
1252         }
1253
1254         strcpy(obj->path, path);
1255         if (obj_name) {
1256                 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1257         } else {
1258                 /* Using basename() GNU version which doesn't modify arg. */
1259                 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1260                 end = strchr(obj->name, '.');
1261                 if (end)
1262                         *end = 0;
1263         }
1264
1265         obj->efile.fd = -1;
1266         /*
1267          * Caller of this function should also call
1268          * bpf_object__elf_finish() after data collection to return
1269          * obj_buf to user. If not, we should duplicate the buffer to
1270          * avoid user freeing them before elf finish.
1271          */
1272         obj->efile.obj_buf = obj_buf;
1273         obj->efile.obj_buf_sz = obj_buf_sz;
1274         obj->efile.btf_maps_shndx = -1;
1275         obj->efile.st_ops_shndx = -1;
1276         obj->efile.st_ops_link_shndx = -1;
1277         obj->kconfig_map_idx = -1;
1278
1279         obj->kern_version = get_kernel_version();
1280         obj->loaded = false;
1281
1282         return obj;
1283 }
1284
1285 static void bpf_object__elf_finish(struct bpf_object *obj)
1286 {
1287         if (!obj->efile.elf)
1288                 return;
1289
1290         elf_end(obj->efile.elf);
1291         obj->efile.elf = NULL;
1292         obj->efile.symbols = NULL;
1293         obj->efile.st_ops_data = NULL;
1294         obj->efile.st_ops_link_data = NULL;
1295
1296         zfree(&obj->efile.secs);
1297         obj->efile.sec_cnt = 0;
1298         zclose(obj->efile.fd);
1299         obj->efile.obj_buf = NULL;
1300         obj->efile.obj_buf_sz = 0;
1301 }
1302
1303 static int bpf_object__elf_init(struct bpf_object *obj)
1304 {
1305         Elf64_Ehdr *ehdr;
1306         int err = 0;
1307         Elf *elf;
1308
1309         if (obj->efile.elf) {
1310                 pr_warn("elf: init internal error\n");
1311                 return -LIBBPF_ERRNO__LIBELF;
1312         }
1313
1314         if (obj->efile.obj_buf_sz > 0) {
1315                 /* obj_buf should have been validated by bpf_object__open_mem(). */
1316                 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1317         } else {
1318                 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1319                 if (obj->efile.fd < 0) {
1320                         char errmsg[STRERR_BUFSIZE], *cp;
1321
1322                         err = -errno;
1323                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1324                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1325                         return err;
1326                 }
1327
1328                 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1329         }
1330
1331         if (!elf) {
1332                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1333                 err = -LIBBPF_ERRNO__LIBELF;
1334                 goto errout;
1335         }
1336
1337         obj->efile.elf = elf;
1338
1339         if (elf_kind(elf) != ELF_K_ELF) {
1340                 err = -LIBBPF_ERRNO__FORMAT;
1341                 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1342                 goto errout;
1343         }
1344
1345         if (gelf_getclass(elf) != ELFCLASS64) {
1346                 err = -LIBBPF_ERRNO__FORMAT;
1347                 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1348                 goto errout;
1349         }
1350
1351         obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1352         if (!obj->efile.ehdr) {
1353                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1354                 err = -LIBBPF_ERRNO__FORMAT;
1355                 goto errout;
1356         }
1357
1358         if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1359                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1360                         obj->path, elf_errmsg(-1));
1361                 err = -LIBBPF_ERRNO__FORMAT;
1362                 goto errout;
1363         }
1364
1365         /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1366         if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1367                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1368                         obj->path, elf_errmsg(-1));
1369                 err = -LIBBPF_ERRNO__FORMAT;
1370                 goto errout;
1371         }
1372
1373         /* Old LLVM set e_machine to EM_NONE */
1374         if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1375                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1376                 err = -LIBBPF_ERRNO__FORMAT;
1377                 goto errout;
1378         }
1379
1380         return 0;
1381 errout:
1382         bpf_object__elf_finish(obj);
1383         return err;
1384 }
1385
1386 static int bpf_object__check_endianness(struct bpf_object *obj)
1387 {
1388 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1389         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1390                 return 0;
1391 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1392         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1393                 return 0;
1394 #else
1395 # error "Unrecognized __BYTE_ORDER__"
1396 #endif
1397         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1398         return -LIBBPF_ERRNO__ENDIAN;
1399 }
1400
1401 static int
1402 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1403 {
1404         if (!data) {
1405                 pr_warn("invalid license section in %s\n", obj->path);
1406                 return -LIBBPF_ERRNO__FORMAT;
1407         }
1408         /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1409          * go over allowed ELF data section buffer
1410          */
1411         libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1412         pr_debug("license of %s is %s\n", obj->path, obj->license);
1413         return 0;
1414 }
1415
1416 static int
1417 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1418 {
1419         __u32 kver;
1420
1421         if (!data || size != sizeof(kver)) {
1422                 pr_warn("invalid kver section in %s\n", obj->path);
1423                 return -LIBBPF_ERRNO__FORMAT;
1424         }
1425         memcpy(&kver, data, sizeof(kver));
1426         obj->kern_version = kver;
1427         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1428         return 0;
1429 }
1430
1431 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1432 {
1433         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1434             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1435                 return true;
1436         return false;
1437 }
1438
1439 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1440 {
1441         Elf_Data *data;
1442         Elf_Scn *scn;
1443
1444         if (!name)
1445                 return -EINVAL;
1446
1447         scn = elf_sec_by_name(obj, name);
1448         data = elf_sec_data(obj, scn);
1449         if (data) {
1450                 *size = data->d_size;
1451                 return 0; /* found it */
1452         }
1453
1454         return -ENOENT;
1455 }
1456
1457 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1458 {
1459         Elf_Data *symbols = obj->efile.symbols;
1460         const char *sname;
1461         size_t si;
1462
1463         for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1464                 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1465
1466                 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1467                         continue;
1468
1469                 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1470                     ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1471                         continue;
1472
1473                 sname = elf_sym_str(obj, sym->st_name);
1474                 if (!sname) {
1475                         pr_warn("failed to get sym name string for var %s\n", name);
1476                         return ERR_PTR(-EIO);
1477                 }
1478                 if (strcmp(name, sname) == 0)
1479                         return sym;
1480         }
1481
1482         return ERR_PTR(-ENOENT);
1483 }
1484
1485 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1486 {
1487         struct bpf_map *map;
1488         int err;
1489
1490         err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1491                                 sizeof(*obj->maps), obj->nr_maps + 1);
1492         if (err)
1493                 return ERR_PTR(err);
1494
1495         map = &obj->maps[obj->nr_maps++];
1496         map->obj = obj;
1497         map->fd = -1;
1498         map->inner_map_fd = -1;
1499         map->autocreate = true;
1500
1501         return map;
1502 }
1503
1504 static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1505 {
1506         const long page_sz = sysconf(_SC_PAGE_SIZE);
1507         size_t map_sz;
1508
1509         map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1510         map_sz = roundup(map_sz, page_sz);
1511         return map_sz;
1512 }
1513
1514 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1515 {
1516         void *mmaped;
1517
1518         if (!map->mmaped)
1519                 return -EINVAL;
1520
1521         if (old_sz == new_sz)
1522                 return 0;
1523
1524         mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1525         if (mmaped == MAP_FAILED)
1526                 return -errno;
1527
1528         memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1529         munmap(map->mmaped, old_sz);
1530         map->mmaped = mmaped;
1531         return 0;
1532 }
1533
1534 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1535 {
1536         char map_name[BPF_OBJ_NAME_LEN], *p;
1537         int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1538
1539         /* This is one of the more confusing parts of libbpf for various
1540          * reasons, some of which are historical. The original idea for naming
1541          * internal names was to include as much of BPF object name prefix as
1542          * possible, so that it can be distinguished from similar internal
1543          * maps of a different BPF object.
1544          * As an example, let's say we have bpf_object named 'my_object_name'
1545          * and internal map corresponding to '.rodata' ELF section. The final
1546          * map name advertised to user and to the kernel will be
1547          * 'my_objec.rodata', taking first 8 characters of object name and
1548          * entire 7 characters of '.rodata'.
1549          * Somewhat confusingly, if internal map ELF section name is shorter
1550          * than 7 characters, e.g., '.bss', we still reserve 7 characters
1551          * for the suffix, even though we only have 4 actual characters, and
1552          * resulting map will be called 'my_objec.bss', not even using all 15
1553          * characters allowed by the kernel. Oh well, at least the truncated
1554          * object name is somewhat consistent in this case. But if the map
1555          * name is '.kconfig', we'll still have entirety of '.kconfig' added
1556          * (8 chars) and thus will be left with only first 7 characters of the
1557          * object name ('my_obje'). Happy guessing, user, that the final map
1558          * name will be "my_obje.kconfig".
1559          * Now, with libbpf starting to support arbitrarily named .rodata.*
1560          * and .data.* data sections, it's possible that ELF section name is
1561          * longer than allowed 15 chars, so we now need to be careful to take
1562          * only up to 15 first characters of ELF name, taking no BPF object
1563          * name characters at all. So '.rodata.abracadabra' will result in
1564          * '.rodata.abracad' kernel and user-visible name.
1565          * We need to keep this convoluted logic intact for .data, .bss and
1566          * .rodata maps, but for new custom .data.custom and .rodata.custom
1567          * maps we use their ELF names as is, not prepending bpf_object name
1568          * in front. We still need to truncate them to 15 characters for the
1569          * kernel. Full name can be recovered for such maps by using DATASEC
1570          * BTF type associated with such map's value type, though.
1571          */
1572         if (sfx_len >= BPF_OBJ_NAME_LEN)
1573                 sfx_len = BPF_OBJ_NAME_LEN - 1;
1574
1575         /* if there are two or more dots in map name, it's a custom dot map */
1576         if (strchr(real_name + 1, '.') != NULL)
1577                 pfx_len = 0;
1578         else
1579                 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1580
1581         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1582                  sfx_len, real_name);
1583
1584         /* sanitise map name to characters allowed by kernel */
1585         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1586                 if (!isalnum(*p) && *p != '_' && *p != '.')
1587                         *p = '_';
1588
1589         return strdup(map_name);
1590 }
1591
1592 static int
1593 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1594
1595 /* Internal BPF map is mmap()'able only if at least one of corresponding
1596  * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1597  * variable and it's not marked as __hidden (which turns it into, effectively,
1598  * a STATIC variable).
1599  */
1600 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1601 {
1602         const struct btf_type *t, *vt;
1603         struct btf_var_secinfo *vsi;
1604         int i, n;
1605
1606         if (!map->btf_value_type_id)
1607                 return false;
1608
1609         t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1610         if (!btf_is_datasec(t))
1611                 return false;
1612
1613         vsi = btf_var_secinfos(t);
1614         for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1615                 vt = btf__type_by_id(obj->btf, vsi->type);
1616                 if (!btf_is_var(vt))
1617                         continue;
1618
1619                 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1620                         return true;
1621         }
1622
1623         return false;
1624 }
1625
1626 static int
1627 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1628                               const char *real_name, int sec_idx, void *data, size_t data_sz)
1629 {
1630         struct bpf_map_def *def;
1631         struct bpf_map *map;
1632         size_t mmap_sz;
1633         int err;
1634
1635         map = bpf_object__add_map(obj);
1636         if (IS_ERR(map))
1637                 return PTR_ERR(map);
1638
1639         map->libbpf_type = type;
1640         map->sec_idx = sec_idx;
1641         map->sec_offset = 0;
1642         map->real_name = strdup(real_name);
1643         map->name = internal_map_name(obj, real_name);
1644         if (!map->real_name || !map->name) {
1645                 zfree(&map->real_name);
1646                 zfree(&map->name);
1647                 return -ENOMEM;
1648         }
1649
1650         def = &map->def;
1651         def->type = BPF_MAP_TYPE_ARRAY;
1652         def->key_size = sizeof(int);
1653         def->value_size = data_sz;
1654         def->max_entries = 1;
1655         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1656                          ? BPF_F_RDONLY_PROG : 0;
1657
1658         /* failures are fine because of maps like .rodata.str1.1 */
1659         (void) map_fill_btf_type_info(obj, map);
1660
1661         if (map_is_mmapable(obj, map))
1662                 def->map_flags |= BPF_F_MMAPABLE;
1663
1664         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1665                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1666
1667         mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
1668         map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1669                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1670         if (map->mmaped == MAP_FAILED) {
1671                 err = -errno;
1672                 map->mmaped = NULL;
1673                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1674                         map->name, err);
1675                 zfree(&map->real_name);
1676                 zfree(&map->name);
1677                 return err;
1678         }
1679
1680         if (data)
1681                 memcpy(map->mmaped, data, data_sz);
1682
1683         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1684         return 0;
1685 }
1686
1687 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1688 {
1689         struct elf_sec_desc *sec_desc;
1690         const char *sec_name;
1691         int err = 0, sec_idx;
1692
1693         /*
1694          * Populate obj->maps with libbpf internal maps.
1695          */
1696         for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1697                 sec_desc = &obj->efile.secs[sec_idx];
1698
1699                 /* Skip recognized sections with size 0. */
1700                 if (!sec_desc->data || sec_desc->data->d_size == 0)
1701                         continue;
1702
1703                 switch (sec_desc->sec_type) {
1704                 case SEC_DATA:
1705                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1706                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1707                                                             sec_name, sec_idx,
1708                                                             sec_desc->data->d_buf,
1709                                                             sec_desc->data->d_size);
1710                         break;
1711                 case SEC_RODATA:
1712                         obj->has_rodata = true;
1713                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1714                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1715                                                             sec_name, sec_idx,
1716                                                             sec_desc->data->d_buf,
1717                                                             sec_desc->data->d_size);
1718                         break;
1719                 case SEC_BSS:
1720                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1721                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1722                                                             sec_name, sec_idx,
1723                                                             NULL,
1724                                                             sec_desc->data->d_size);
1725                         break;
1726                 default:
1727                         /* skip */
1728                         break;
1729                 }
1730                 if (err)
1731                         return err;
1732         }
1733         return 0;
1734 }
1735
1736
1737 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1738                                                const void *name)
1739 {
1740         int i;
1741
1742         for (i = 0; i < obj->nr_extern; i++) {
1743                 if (strcmp(obj->externs[i].name, name) == 0)
1744                         return &obj->externs[i];
1745         }
1746         return NULL;
1747 }
1748
1749 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1750                               char value)
1751 {
1752         switch (ext->kcfg.type) {
1753         case KCFG_BOOL:
1754                 if (value == 'm') {
1755                         pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1756                                 ext->name, value);
1757                         return -EINVAL;
1758                 }
1759                 *(bool *)ext_val = value == 'y' ? true : false;
1760                 break;
1761         case KCFG_TRISTATE:
1762                 if (value == 'y')
1763                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1764                 else if (value == 'm')
1765                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1766                 else /* value == 'n' */
1767                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1768                 break;
1769         case KCFG_CHAR:
1770                 *(char *)ext_val = value;
1771                 break;
1772         case KCFG_UNKNOWN:
1773         case KCFG_INT:
1774         case KCFG_CHAR_ARR:
1775         default:
1776                 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1777                         ext->name, value);
1778                 return -EINVAL;
1779         }
1780         ext->is_set = true;
1781         return 0;
1782 }
1783
1784 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1785                               const char *value)
1786 {
1787         size_t len;
1788
1789         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1790                 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1791                         ext->name, value);
1792                 return -EINVAL;
1793         }
1794
1795         len = strlen(value);
1796         if (value[len - 1] != '"') {
1797                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1798                         ext->name, value);
1799                 return -EINVAL;
1800         }
1801
1802         /* strip quotes */
1803         len -= 2;
1804         if (len >= ext->kcfg.sz) {
1805                 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1806                         ext->name, value, len, ext->kcfg.sz - 1);
1807                 len = ext->kcfg.sz - 1;
1808         }
1809         memcpy(ext_val, value + 1, len);
1810         ext_val[len] = '\0';
1811         ext->is_set = true;
1812         return 0;
1813 }
1814
1815 static int parse_u64(const char *value, __u64 *res)
1816 {
1817         char *value_end;
1818         int err;
1819
1820         errno = 0;
1821         *res = strtoull(value, &value_end, 0);
1822         if (errno) {
1823                 err = -errno;
1824                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1825                 return err;
1826         }
1827         if (*value_end) {
1828                 pr_warn("failed to parse '%s' as integer completely\n", value);
1829                 return -EINVAL;
1830         }
1831         return 0;
1832 }
1833
1834 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1835 {
1836         int bit_sz = ext->kcfg.sz * 8;
1837
1838         if (ext->kcfg.sz == 8)
1839                 return true;
1840
1841         /* Validate that value stored in u64 fits in integer of `ext->sz`
1842          * bytes size without any loss of information. If the target integer
1843          * is signed, we rely on the following limits of integer type of
1844          * Y bits and subsequent transformation:
1845          *
1846          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1847          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1848          *            0 <= X + 2^(Y-1) <  2^Y
1849          *
1850          *  For unsigned target integer, check that all the (64 - Y) bits are
1851          *  zero.
1852          */
1853         if (ext->kcfg.is_signed)
1854                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1855         else
1856                 return (v >> bit_sz) == 0;
1857 }
1858
1859 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1860                               __u64 value)
1861 {
1862         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
1863             ext->kcfg.type != KCFG_BOOL) {
1864                 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
1865                         ext->name, (unsigned long long)value);
1866                 return -EINVAL;
1867         }
1868         if (ext->kcfg.type == KCFG_BOOL && value > 1) {
1869                 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
1870                         ext->name, (unsigned long long)value);
1871                 return -EINVAL;
1872
1873         }
1874         if (!is_kcfg_value_in_range(ext, value)) {
1875                 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
1876                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1877                 return -ERANGE;
1878         }
1879         switch (ext->kcfg.sz) {
1880         case 1:
1881                 *(__u8 *)ext_val = value;
1882                 break;
1883         case 2:
1884                 *(__u16 *)ext_val = value;
1885                 break;
1886         case 4:
1887                 *(__u32 *)ext_val = value;
1888                 break;
1889         case 8:
1890                 *(__u64 *)ext_val = value;
1891                 break;
1892         default:
1893                 return -EINVAL;
1894         }
1895         ext->is_set = true;
1896         return 0;
1897 }
1898
1899 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1900                                             char *buf, void *data)
1901 {
1902         struct extern_desc *ext;
1903         char *sep, *value;
1904         int len, err = 0;
1905         void *ext_val;
1906         __u64 num;
1907
1908         if (!str_has_pfx(buf, "CONFIG_"))
1909                 return 0;
1910
1911         sep = strchr(buf, '=');
1912         if (!sep) {
1913                 pr_warn("failed to parse '%s': no separator\n", buf);
1914                 return -EINVAL;
1915         }
1916
1917         /* Trim ending '\n' */
1918         len = strlen(buf);
1919         if (buf[len - 1] == '\n')
1920                 buf[len - 1] = '\0';
1921         /* Split on '=' and ensure that a value is present. */
1922         *sep = '\0';
1923         if (!sep[1]) {
1924                 *sep = '=';
1925                 pr_warn("failed to parse '%s': no value\n", buf);
1926                 return -EINVAL;
1927         }
1928
1929         ext = find_extern_by_name(obj, buf);
1930         if (!ext || ext->is_set)
1931                 return 0;
1932
1933         ext_val = data + ext->kcfg.data_off;
1934         value = sep + 1;
1935
1936         switch (*value) {
1937         case 'y': case 'n': case 'm':
1938                 err = set_kcfg_value_tri(ext, ext_val, *value);
1939                 break;
1940         case '"':
1941                 err = set_kcfg_value_str(ext, ext_val, value);
1942                 break;
1943         default:
1944                 /* assume integer */
1945                 err = parse_u64(value, &num);
1946                 if (err) {
1947                         pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
1948                         return err;
1949                 }
1950                 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1951                         pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
1952                         return -EINVAL;
1953                 }
1954                 err = set_kcfg_value_num(ext, ext_val, num);
1955                 break;
1956         }
1957         if (err)
1958                 return err;
1959         pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
1960         return 0;
1961 }
1962
1963 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1964 {
1965         char buf[PATH_MAX];
1966         struct utsname uts;
1967         int len, err = 0;
1968         gzFile file;
1969
1970         uname(&uts);
1971         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1972         if (len < 0)
1973                 return -EINVAL;
1974         else if (len >= PATH_MAX)
1975                 return -ENAMETOOLONG;
1976
1977         /* gzopen also accepts uncompressed files. */
1978         file = gzopen(buf, "r");
1979         if (!file)
1980                 file = gzopen("/proc/config.gz", "r");
1981
1982         if (!file) {
1983                 pr_warn("failed to open system Kconfig\n");
1984                 return -ENOENT;
1985         }
1986
1987         while (gzgets(file, buf, sizeof(buf))) {
1988                 err = bpf_object__process_kconfig_line(obj, buf, data);
1989                 if (err) {
1990                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1991                                 buf, err);
1992                         goto out;
1993                 }
1994         }
1995
1996 out:
1997         gzclose(file);
1998         return err;
1999 }
2000
2001 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2002                                         const char *config, void *data)
2003 {
2004         char buf[PATH_MAX];
2005         int err = 0;
2006         FILE *file;
2007
2008         file = fmemopen((void *)config, strlen(config), "r");
2009         if (!file) {
2010                 err = -errno;
2011                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
2012                 return err;
2013         }
2014
2015         while (fgets(buf, sizeof(buf), file)) {
2016                 err = bpf_object__process_kconfig_line(obj, buf, data);
2017                 if (err) {
2018                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2019                                 buf, err);
2020                         break;
2021                 }
2022         }
2023
2024         fclose(file);
2025         return err;
2026 }
2027
2028 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2029 {
2030         struct extern_desc *last_ext = NULL, *ext;
2031         size_t map_sz;
2032         int i, err;
2033
2034         for (i = 0; i < obj->nr_extern; i++) {
2035                 ext = &obj->externs[i];
2036                 if (ext->type == EXT_KCFG)
2037                         last_ext = ext;
2038         }
2039
2040         if (!last_ext)
2041                 return 0;
2042
2043         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2044         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2045                                             ".kconfig", obj->efile.symbols_shndx,
2046                                             NULL, map_sz);
2047         if (err)
2048                 return err;
2049
2050         obj->kconfig_map_idx = obj->nr_maps - 1;
2051
2052         return 0;
2053 }
2054
2055 const struct btf_type *
2056 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2057 {
2058         const struct btf_type *t = btf__type_by_id(btf, id);
2059
2060         if (res_id)
2061                 *res_id = id;
2062
2063         while (btf_is_mod(t) || btf_is_typedef(t)) {
2064                 if (res_id)
2065                         *res_id = t->type;
2066                 t = btf__type_by_id(btf, t->type);
2067         }
2068
2069         return t;
2070 }
2071
2072 static const struct btf_type *
2073 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2074 {
2075         const struct btf_type *t;
2076
2077         t = skip_mods_and_typedefs(btf, id, NULL);
2078         if (!btf_is_ptr(t))
2079                 return NULL;
2080
2081         t = skip_mods_and_typedefs(btf, t->type, res_id);
2082
2083         return btf_is_func_proto(t) ? t : NULL;
2084 }
2085
2086 static const char *__btf_kind_str(__u16 kind)
2087 {
2088         switch (kind) {
2089         case BTF_KIND_UNKN: return "void";
2090         case BTF_KIND_INT: return "int";
2091         case BTF_KIND_PTR: return "ptr";
2092         case BTF_KIND_ARRAY: return "array";
2093         case BTF_KIND_STRUCT: return "struct";
2094         case BTF_KIND_UNION: return "union";
2095         case BTF_KIND_ENUM: return "enum";
2096         case BTF_KIND_FWD: return "fwd";
2097         case BTF_KIND_TYPEDEF: return "typedef";
2098         case BTF_KIND_VOLATILE: return "volatile";
2099         case BTF_KIND_CONST: return "const";
2100         case BTF_KIND_RESTRICT: return "restrict";
2101         case BTF_KIND_FUNC: return "func";
2102         case BTF_KIND_FUNC_PROTO: return "func_proto";
2103         case BTF_KIND_VAR: return "var";
2104         case BTF_KIND_DATASEC: return "datasec";
2105         case BTF_KIND_FLOAT: return "float";
2106         case BTF_KIND_DECL_TAG: return "decl_tag";
2107         case BTF_KIND_TYPE_TAG: return "type_tag";
2108         case BTF_KIND_ENUM64: return "enum64";
2109         default: return "unknown";
2110         }
2111 }
2112
2113 const char *btf_kind_str(const struct btf_type *t)
2114 {
2115         return __btf_kind_str(btf_kind(t));
2116 }
2117
2118 /*
2119  * Fetch integer attribute of BTF map definition. Such attributes are
2120  * represented using a pointer to an array, in which dimensionality of array
2121  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2122  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2123  * type definition, while using only sizeof(void *) space in ELF data section.
2124  */
2125 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2126                               const struct btf_member *m, __u32 *res)
2127 {
2128         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2129         const char *name = btf__name_by_offset(btf, m->name_off);
2130         const struct btf_array *arr_info;
2131         const struct btf_type *arr_t;
2132
2133         if (!btf_is_ptr(t)) {
2134                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2135                         map_name, name, btf_kind_str(t));
2136                 return false;
2137         }
2138
2139         arr_t = btf__type_by_id(btf, t->type);
2140         if (!arr_t) {
2141                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2142                         map_name, name, t->type);
2143                 return false;
2144         }
2145         if (!btf_is_array(arr_t)) {
2146                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2147                         map_name, name, btf_kind_str(arr_t));
2148                 return false;
2149         }
2150         arr_info = btf_array(arr_t);
2151         *res = arr_info->nelems;
2152         return true;
2153 }
2154
2155 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2156 {
2157         int len;
2158
2159         len = snprintf(buf, buf_sz, "%s/%s", path, name);
2160         if (len < 0)
2161                 return -EINVAL;
2162         if (len >= buf_sz)
2163                 return -ENAMETOOLONG;
2164
2165         return 0;
2166 }
2167
2168 static int build_map_pin_path(struct bpf_map *map, const char *path)
2169 {
2170         char buf[PATH_MAX];
2171         int err;
2172
2173         if (!path)
2174                 path = "/sys/fs/bpf";
2175
2176         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2177         if (err)
2178                 return err;
2179
2180         return bpf_map__set_pin_path(map, buf);
2181 }
2182
2183 /* should match definition in bpf_helpers.h */
2184 enum libbpf_pin_type {
2185         LIBBPF_PIN_NONE,
2186         /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2187         LIBBPF_PIN_BY_NAME,
2188 };
2189
2190 int parse_btf_map_def(const char *map_name, struct btf *btf,
2191                       const struct btf_type *def_t, bool strict,
2192                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2193 {
2194         const struct btf_type *t;
2195         const struct btf_member *m;
2196         bool is_inner = inner_def == NULL;
2197         int vlen, i;
2198
2199         vlen = btf_vlen(def_t);
2200         m = btf_members(def_t);
2201         for (i = 0; i < vlen; i++, m++) {
2202                 const char *name = btf__name_by_offset(btf, m->name_off);
2203
2204                 if (!name) {
2205                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2206                         return -EINVAL;
2207                 }
2208                 if (strcmp(name, "type") == 0) {
2209                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2210                                 return -EINVAL;
2211                         map_def->parts |= MAP_DEF_MAP_TYPE;
2212                 } else if (strcmp(name, "max_entries") == 0) {
2213                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2214                                 return -EINVAL;
2215                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2216                 } else if (strcmp(name, "map_flags") == 0) {
2217                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2218                                 return -EINVAL;
2219                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2220                 } else if (strcmp(name, "numa_node") == 0) {
2221                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2222                                 return -EINVAL;
2223                         map_def->parts |= MAP_DEF_NUMA_NODE;
2224                 } else if (strcmp(name, "key_size") == 0) {
2225                         __u32 sz;
2226
2227                         if (!get_map_field_int(map_name, btf, m, &sz))
2228                                 return -EINVAL;
2229                         if (map_def->key_size && map_def->key_size != sz) {
2230                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2231                                         map_name, map_def->key_size, sz);
2232                                 return -EINVAL;
2233                         }
2234                         map_def->key_size = sz;
2235                         map_def->parts |= MAP_DEF_KEY_SIZE;
2236                 } else if (strcmp(name, "key") == 0) {
2237                         __s64 sz;
2238
2239                         t = btf__type_by_id(btf, m->type);
2240                         if (!t) {
2241                                 pr_warn("map '%s': key type [%d] not found.\n",
2242                                         map_name, m->type);
2243                                 return -EINVAL;
2244                         }
2245                         if (!btf_is_ptr(t)) {
2246                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2247                                         map_name, btf_kind_str(t));
2248                                 return -EINVAL;
2249                         }
2250                         sz = btf__resolve_size(btf, t->type);
2251                         if (sz < 0) {
2252                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2253                                         map_name, t->type, (ssize_t)sz);
2254                                 return sz;
2255                         }
2256                         if (map_def->key_size && map_def->key_size != sz) {
2257                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2258                                         map_name, map_def->key_size, (ssize_t)sz);
2259                                 return -EINVAL;
2260                         }
2261                         map_def->key_size = sz;
2262                         map_def->key_type_id = t->type;
2263                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2264                 } else if (strcmp(name, "value_size") == 0) {
2265                         __u32 sz;
2266
2267                         if (!get_map_field_int(map_name, btf, m, &sz))
2268                                 return -EINVAL;
2269                         if (map_def->value_size && map_def->value_size != sz) {
2270                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2271                                         map_name, map_def->value_size, sz);
2272                                 return -EINVAL;
2273                         }
2274                         map_def->value_size = sz;
2275                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2276                 } else if (strcmp(name, "value") == 0) {
2277                         __s64 sz;
2278
2279                         t = btf__type_by_id(btf, m->type);
2280                         if (!t) {
2281                                 pr_warn("map '%s': value type [%d] not found.\n",
2282                                         map_name, m->type);
2283                                 return -EINVAL;
2284                         }
2285                         if (!btf_is_ptr(t)) {
2286                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2287                                         map_name, btf_kind_str(t));
2288                                 return -EINVAL;
2289                         }
2290                         sz = btf__resolve_size(btf, t->type);
2291                         if (sz < 0) {
2292                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2293                                         map_name, t->type, (ssize_t)sz);
2294                                 return sz;
2295                         }
2296                         if (map_def->value_size && map_def->value_size != sz) {
2297                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2298                                         map_name, map_def->value_size, (ssize_t)sz);
2299                                 return -EINVAL;
2300                         }
2301                         map_def->value_size = sz;
2302                         map_def->value_type_id = t->type;
2303                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2304                 }
2305                 else if (strcmp(name, "values") == 0) {
2306                         bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2307                         bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2308                         const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2309                         char inner_map_name[128];
2310                         int err;
2311
2312                         if (is_inner) {
2313                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2314                                         map_name);
2315                                 return -ENOTSUP;
2316                         }
2317                         if (i != vlen - 1) {
2318                                 pr_warn("map '%s': '%s' member should be last.\n",
2319                                         map_name, name);
2320                                 return -EINVAL;
2321                         }
2322                         if (!is_map_in_map && !is_prog_array) {
2323                                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2324                                         map_name);
2325                                 return -ENOTSUP;
2326                         }
2327                         if (map_def->value_size && map_def->value_size != 4) {
2328                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2329                                         map_name, map_def->value_size);
2330                                 return -EINVAL;
2331                         }
2332                         map_def->value_size = 4;
2333                         t = btf__type_by_id(btf, m->type);
2334                         if (!t) {
2335                                 pr_warn("map '%s': %s type [%d] not found.\n",
2336                                         map_name, desc, m->type);
2337                                 return -EINVAL;
2338                         }
2339                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2340                                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2341                                         map_name, desc);
2342                                 return -EINVAL;
2343                         }
2344                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2345                         if (!btf_is_ptr(t)) {
2346                                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2347                                         map_name, desc, btf_kind_str(t));
2348                                 return -EINVAL;
2349                         }
2350                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2351                         if (is_prog_array) {
2352                                 if (!btf_is_func_proto(t)) {
2353                                         pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2354                                                 map_name, btf_kind_str(t));
2355                                         return -EINVAL;
2356                                 }
2357                                 continue;
2358                         }
2359                         if (!btf_is_struct(t)) {
2360                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2361                                         map_name, btf_kind_str(t));
2362                                 return -EINVAL;
2363                         }
2364
2365                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2366                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2367                         if (err)
2368                                 return err;
2369
2370                         map_def->parts |= MAP_DEF_INNER_MAP;
2371                 } else if (strcmp(name, "pinning") == 0) {
2372                         __u32 val;
2373
2374                         if (is_inner) {
2375                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2376                                 return -EINVAL;
2377                         }
2378                         if (!get_map_field_int(map_name, btf, m, &val))
2379                                 return -EINVAL;
2380                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2381                                 pr_warn("map '%s': invalid pinning value %u.\n",
2382                                         map_name, val);
2383                                 return -EINVAL;
2384                         }
2385                         map_def->pinning = val;
2386                         map_def->parts |= MAP_DEF_PINNING;
2387                 } else if (strcmp(name, "map_extra") == 0) {
2388                         __u32 map_extra;
2389
2390                         if (!get_map_field_int(map_name, btf, m, &map_extra))
2391                                 return -EINVAL;
2392                         map_def->map_extra = map_extra;
2393                         map_def->parts |= MAP_DEF_MAP_EXTRA;
2394                 } else {
2395                         if (strict) {
2396                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2397                                 return -ENOTSUP;
2398                         }
2399                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2400                 }
2401         }
2402
2403         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2404                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2405                 return -EINVAL;
2406         }
2407
2408         return 0;
2409 }
2410
2411 static size_t adjust_ringbuf_sz(size_t sz)
2412 {
2413         __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2414         __u32 mul;
2415
2416         /* if user forgot to set any size, make sure they see error */
2417         if (sz == 0)
2418                 return 0;
2419         /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2420          * a power-of-2 multiple of kernel's page size. If user diligently
2421          * satisified these conditions, pass the size through.
2422          */
2423         if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2424                 return sz;
2425
2426         /* Otherwise find closest (page_sz * power_of_2) product bigger than
2427          * user-set size to satisfy both user size request and kernel
2428          * requirements and substitute correct max_entries for map creation.
2429          */
2430         for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2431                 if (mul * page_sz > sz)
2432                         return mul * page_sz;
2433         }
2434
2435         /* if it's impossible to satisfy the conditions (i.e., user size is
2436          * very close to UINT_MAX but is not a power-of-2 multiple of
2437          * page_size) then just return original size and let kernel reject it
2438          */
2439         return sz;
2440 }
2441
2442 static bool map_is_ringbuf(const struct bpf_map *map)
2443 {
2444         return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2445                map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2446 }
2447
2448 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2449 {
2450         map->def.type = def->map_type;
2451         map->def.key_size = def->key_size;
2452         map->def.value_size = def->value_size;
2453         map->def.max_entries = def->max_entries;
2454         map->def.map_flags = def->map_flags;
2455         map->map_extra = def->map_extra;
2456
2457         map->numa_node = def->numa_node;
2458         map->btf_key_type_id = def->key_type_id;
2459         map->btf_value_type_id = def->value_type_id;
2460
2461         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2462         if (map_is_ringbuf(map))
2463                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2464
2465         if (def->parts & MAP_DEF_MAP_TYPE)
2466                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2467
2468         if (def->parts & MAP_DEF_KEY_TYPE)
2469                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2470                          map->name, def->key_type_id, def->key_size);
2471         else if (def->parts & MAP_DEF_KEY_SIZE)
2472                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2473
2474         if (def->parts & MAP_DEF_VALUE_TYPE)
2475                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2476                          map->name, def->value_type_id, def->value_size);
2477         else if (def->parts & MAP_DEF_VALUE_SIZE)
2478                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2479
2480         if (def->parts & MAP_DEF_MAX_ENTRIES)
2481                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2482         if (def->parts & MAP_DEF_MAP_FLAGS)
2483                 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2484         if (def->parts & MAP_DEF_MAP_EXTRA)
2485                 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2486                          (unsigned long long)def->map_extra);
2487         if (def->parts & MAP_DEF_PINNING)
2488                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2489         if (def->parts & MAP_DEF_NUMA_NODE)
2490                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2491
2492         if (def->parts & MAP_DEF_INNER_MAP)
2493                 pr_debug("map '%s': found inner map definition.\n", map->name);
2494 }
2495
2496 static const char *btf_var_linkage_str(__u32 linkage)
2497 {
2498         switch (linkage) {
2499         case BTF_VAR_STATIC: return "static";
2500         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2501         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2502         default: return "unknown";
2503         }
2504 }
2505
2506 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2507                                          const struct btf_type *sec,
2508                                          int var_idx, int sec_idx,
2509                                          const Elf_Data *data, bool strict,
2510                                          const char *pin_root_path)
2511 {
2512         struct btf_map_def map_def = {}, inner_def = {};
2513         const struct btf_type *var, *def;
2514         const struct btf_var_secinfo *vi;
2515         const struct btf_var *var_extra;
2516         const char *map_name;
2517         struct bpf_map *map;
2518         int err;
2519
2520         vi = btf_var_secinfos(sec) + var_idx;
2521         var = btf__type_by_id(obj->btf, vi->type);
2522         var_extra = btf_var(var);
2523         map_name = btf__name_by_offset(obj->btf, var->name_off);
2524
2525         if (map_name == NULL || map_name[0] == '\0') {
2526                 pr_warn("map #%d: empty name.\n", var_idx);
2527                 return -EINVAL;
2528         }
2529         if ((__u64)vi->offset + vi->size > data->d_size) {
2530                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2531                 return -EINVAL;
2532         }
2533         if (!btf_is_var(var)) {
2534                 pr_warn("map '%s': unexpected var kind %s.\n",
2535                         map_name, btf_kind_str(var));
2536                 return -EINVAL;
2537         }
2538         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2539                 pr_warn("map '%s': unsupported map linkage %s.\n",
2540                         map_name, btf_var_linkage_str(var_extra->linkage));
2541                 return -EOPNOTSUPP;
2542         }
2543
2544         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2545         if (!btf_is_struct(def)) {
2546                 pr_warn("map '%s': unexpected def kind %s.\n",
2547                         map_name, btf_kind_str(var));
2548                 return -EINVAL;
2549         }
2550         if (def->size > vi->size) {
2551                 pr_warn("map '%s': invalid def size.\n", map_name);
2552                 return -EINVAL;
2553         }
2554
2555         map = bpf_object__add_map(obj);
2556         if (IS_ERR(map))
2557                 return PTR_ERR(map);
2558         map->name = strdup(map_name);
2559         if (!map->name) {
2560                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2561                 return -ENOMEM;
2562         }
2563         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2564         map->def.type = BPF_MAP_TYPE_UNSPEC;
2565         map->sec_idx = sec_idx;
2566         map->sec_offset = vi->offset;
2567         map->btf_var_idx = var_idx;
2568         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2569                  map_name, map->sec_idx, map->sec_offset);
2570
2571         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2572         if (err)
2573                 return err;
2574
2575         fill_map_from_def(map, &map_def);
2576
2577         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2578                 err = build_map_pin_path(map, pin_root_path);
2579                 if (err) {
2580                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2581                         return err;
2582                 }
2583         }
2584
2585         if (map_def.parts & MAP_DEF_INNER_MAP) {
2586                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2587                 if (!map->inner_map)
2588                         return -ENOMEM;
2589                 map->inner_map->fd = -1;
2590                 map->inner_map->sec_idx = sec_idx;
2591                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2592                 if (!map->inner_map->name)
2593                         return -ENOMEM;
2594                 sprintf(map->inner_map->name, "%s.inner", map_name);
2595
2596                 fill_map_from_def(map->inner_map, &inner_def);
2597         }
2598
2599         err = map_fill_btf_type_info(obj, map);
2600         if (err)
2601                 return err;
2602
2603         return 0;
2604 }
2605
2606 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2607                                           const char *pin_root_path)
2608 {
2609         const struct btf_type *sec = NULL;
2610         int nr_types, i, vlen, err;
2611         const struct btf_type *t;
2612         const char *name;
2613         Elf_Data *data;
2614         Elf_Scn *scn;
2615
2616         if (obj->efile.btf_maps_shndx < 0)
2617                 return 0;
2618
2619         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2620         data = elf_sec_data(obj, scn);
2621         if (!scn || !data) {
2622                 pr_warn("elf: failed to get %s map definitions for %s\n",
2623                         MAPS_ELF_SEC, obj->path);
2624                 return -EINVAL;
2625         }
2626
2627         nr_types = btf__type_cnt(obj->btf);
2628         for (i = 1; i < nr_types; i++) {
2629                 t = btf__type_by_id(obj->btf, i);
2630                 if (!btf_is_datasec(t))
2631                         continue;
2632                 name = btf__name_by_offset(obj->btf, t->name_off);
2633                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2634                         sec = t;
2635                         obj->efile.btf_maps_sec_btf_id = i;
2636                         break;
2637                 }
2638         }
2639
2640         if (!sec) {
2641                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2642                 return -ENOENT;
2643         }
2644
2645         vlen = btf_vlen(sec);
2646         for (i = 0; i < vlen; i++) {
2647                 err = bpf_object__init_user_btf_map(obj, sec, i,
2648                                                     obj->efile.btf_maps_shndx,
2649                                                     data, strict,
2650                                                     pin_root_path);
2651                 if (err)
2652                         return err;
2653         }
2654
2655         return 0;
2656 }
2657
2658 static int bpf_object__init_maps(struct bpf_object *obj,
2659                                  const struct bpf_object_open_opts *opts)
2660 {
2661         const char *pin_root_path;
2662         bool strict;
2663         int err = 0;
2664
2665         strict = !OPTS_GET(opts, relaxed_maps, false);
2666         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2667
2668         err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2669         err = err ?: bpf_object__init_global_data_maps(obj);
2670         err = err ?: bpf_object__init_kconfig_map(obj);
2671         err = err ?: bpf_object_init_struct_ops(obj);
2672
2673         return err;
2674 }
2675
2676 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2677 {
2678         Elf64_Shdr *sh;
2679
2680         sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2681         if (!sh)
2682                 return false;
2683
2684         return sh->sh_flags & SHF_EXECINSTR;
2685 }
2686
2687 static bool btf_needs_sanitization(struct bpf_object *obj)
2688 {
2689         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2690         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2691         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2692         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2693         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2694         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2695         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2696
2697         return !has_func || !has_datasec || !has_func_global || !has_float ||
2698                !has_decl_tag || !has_type_tag || !has_enum64;
2699 }
2700
2701 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2702 {
2703         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2704         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2705         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2706         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2707         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2708         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2709         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2710         int enum64_placeholder_id = 0;
2711         struct btf_type *t;
2712         int i, j, vlen;
2713
2714         for (i = 1; i < btf__type_cnt(btf); i++) {
2715                 t = (struct btf_type *)btf__type_by_id(btf, i);
2716
2717                 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2718                         /* replace VAR/DECL_TAG with INT */
2719                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2720                         /*
2721                          * using size = 1 is the safest choice, 4 will be too
2722                          * big and cause kernel BTF validation failure if
2723                          * original variable took less than 4 bytes
2724                          */
2725                         t->size = 1;
2726                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2727                 } else if (!has_datasec && btf_is_datasec(t)) {
2728                         /* replace DATASEC with STRUCT */
2729                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2730                         struct btf_member *m = btf_members(t);
2731                         struct btf_type *vt;
2732                         char *name;
2733
2734                         name = (char *)btf__name_by_offset(btf, t->name_off);
2735                         while (*name) {
2736                                 if (*name == '.')
2737                                         *name = '_';
2738                                 name++;
2739                         }
2740
2741                         vlen = btf_vlen(t);
2742                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2743                         for (j = 0; j < vlen; j++, v++, m++) {
2744                                 /* order of field assignments is important */
2745                                 m->offset = v->offset * 8;
2746                                 m->type = v->type;
2747                                 /* preserve variable name as member name */
2748                                 vt = (void *)btf__type_by_id(btf, v->type);
2749                                 m->name_off = vt->name_off;
2750                         }
2751                 } else if (!has_func && btf_is_func_proto(t)) {
2752                         /* replace FUNC_PROTO with ENUM */
2753                         vlen = btf_vlen(t);
2754                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2755                         t->size = sizeof(__u32); /* kernel enforced */
2756                 } else if (!has_func && btf_is_func(t)) {
2757                         /* replace FUNC with TYPEDEF */
2758                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2759                 } else if (!has_func_global && btf_is_func(t)) {
2760                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2761                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2762                 } else if (!has_float && btf_is_float(t)) {
2763                         /* replace FLOAT with an equally-sized empty STRUCT;
2764                          * since C compilers do not accept e.g. "float" as a
2765                          * valid struct name, make it anonymous
2766                          */
2767                         t->name_off = 0;
2768                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2769                 } else if (!has_type_tag && btf_is_type_tag(t)) {
2770                         /* replace TYPE_TAG with a CONST */
2771                         t->name_off = 0;
2772                         t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2773                 } else if (!has_enum64 && btf_is_enum(t)) {
2774                         /* clear the kflag */
2775                         t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2776                 } else if (!has_enum64 && btf_is_enum64(t)) {
2777                         /* replace ENUM64 with a union */
2778                         struct btf_member *m;
2779
2780                         if (enum64_placeholder_id == 0) {
2781                                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2782                                 if (enum64_placeholder_id < 0)
2783                                         return enum64_placeholder_id;
2784
2785                                 t = (struct btf_type *)btf__type_by_id(btf, i);
2786                         }
2787
2788                         m = btf_members(t);
2789                         vlen = btf_vlen(t);
2790                         t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2791                         for (j = 0; j < vlen; j++, m++) {
2792                                 m->type = enum64_placeholder_id;
2793                                 m->offset = 0;
2794                         }
2795                 }
2796         }
2797
2798         return 0;
2799 }
2800
2801 static bool libbpf_needs_btf(const struct bpf_object *obj)
2802 {
2803         return obj->efile.btf_maps_shndx >= 0 ||
2804                obj->efile.st_ops_shndx >= 0 ||
2805                obj->efile.st_ops_link_shndx >= 0 ||
2806                obj->nr_extern > 0;
2807 }
2808
2809 static bool kernel_needs_btf(const struct bpf_object *obj)
2810 {
2811         return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
2812 }
2813
2814 static int bpf_object__init_btf(struct bpf_object *obj,
2815                                 Elf_Data *btf_data,
2816                                 Elf_Data *btf_ext_data)
2817 {
2818         int err = -ENOENT;
2819
2820         if (btf_data) {
2821                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2822                 err = libbpf_get_error(obj->btf);
2823                 if (err) {
2824                         obj->btf = NULL;
2825                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2826                         goto out;
2827                 }
2828                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2829                 btf__set_pointer_size(obj->btf, 8);
2830         }
2831         if (btf_ext_data) {
2832                 struct btf_ext_info *ext_segs[3];
2833                 int seg_num, sec_num;
2834
2835                 if (!obj->btf) {
2836                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2837                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2838                         goto out;
2839                 }
2840                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2841                 err = libbpf_get_error(obj->btf_ext);
2842                 if (err) {
2843                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2844                                 BTF_EXT_ELF_SEC, err);
2845                         obj->btf_ext = NULL;
2846                         goto out;
2847                 }
2848
2849                 /* setup .BTF.ext to ELF section mapping */
2850                 ext_segs[0] = &obj->btf_ext->func_info;
2851                 ext_segs[1] = &obj->btf_ext->line_info;
2852                 ext_segs[2] = &obj->btf_ext->core_relo_info;
2853                 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
2854                         struct btf_ext_info *seg = ext_segs[seg_num];
2855                         const struct btf_ext_info_sec *sec;
2856                         const char *sec_name;
2857                         Elf_Scn *scn;
2858
2859                         if (seg->sec_cnt == 0)
2860                                 continue;
2861
2862                         seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
2863                         if (!seg->sec_idxs) {
2864                                 err = -ENOMEM;
2865                                 goto out;
2866                         }
2867
2868                         sec_num = 0;
2869                         for_each_btf_ext_sec(seg, sec) {
2870                                 /* preventively increment index to avoid doing
2871                                  * this before every continue below
2872                                  */
2873                                 sec_num++;
2874
2875                                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
2876                                 if (str_is_empty(sec_name))
2877                                         continue;
2878                                 scn = elf_sec_by_name(obj, sec_name);
2879                                 if (!scn)
2880                                         continue;
2881
2882                                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
2883                         }
2884                 }
2885         }
2886 out:
2887         if (err && libbpf_needs_btf(obj)) {
2888                 pr_warn("BTF is required, but is missing or corrupted.\n");
2889                 return err;
2890         }
2891         return 0;
2892 }
2893
2894 static int compare_vsi_off(const void *_a, const void *_b)
2895 {
2896         const struct btf_var_secinfo *a = _a;
2897         const struct btf_var_secinfo *b = _b;
2898
2899         return a->offset - b->offset;
2900 }
2901
2902 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
2903                              struct btf_type *t)
2904 {
2905         __u32 size = 0, i, vars = btf_vlen(t);
2906         const char *sec_name = btf__name_by_offset(btf, t->name_off);
2907         struct btf_var_secinfo *vsi;
2908         bool fixup_offsets = false;
2909         int err;
2910
2911         if (!sec_name) {
2912                 pr_debug("No name found in string section for DATASEC kind.\n");
2913                 return -ENOENT;
2914         }
2915
2916         /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
2917          * variable offsets set at the previous step. Further, not every
2918          * extern BTF VAR has corresponding ELF symbol preserved, so we skip
2919          * all fixups altogether for such sections and go straight to sorting
2920          * VARs within their DATASEC.
2921          */
2922         if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
2923                 goto sort_vars;
2924
2925         /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
2926          * fix this up. But BPF static linker already fixes this up and fills
2927          * all the sizes and offsets during static linking. So this step has
2928          * to be optional. But the STV_HIDDEN handling is non-optional for any
2929          * non-extern DATASEC, so the variable fixup loop below handles both
2930          * functions at the same time, paying the cost of BTF VAR <-> ELF
2931          * symbol matching just once.
2932          */
2933         if (t->size == 0) {
2934                 err = find_elf_sec_sz(obj, sec_name, &size);
2935                 if (err || !size) {
2936                         pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
2937                                  sec_name, size, err);
2938                         return -ENOENT;
2939                 }
2940
2941                 t->size = size;
2942                 fixup_offsets = true;
2943         }
2944
2945         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
2946                 const struct btf_type *t_var;
2947                 struct btf_var *var;
2948                 const char *var_name;
2949                 Elf64_Sym *sym;
2950
2951                 t_var = btf__type_by_id(btf, vsi->type);
2952                 if (!t_var || !btf_is_var(t_var)) {
2953                         pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
2954                         return -EINVAL;
2955                 }
2956
2957                 var = btf_var(t_var);
2958                 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
2959                         continue;
2960
2961                 var_name = btf__name_by_offset(btf, t_var->name_off);
2962                 if (!var_name) {
2963                         pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
2964                                  sec_name, i);
2965                         return -ENOENT;
2966                 }
2967
2968                 sym = find_elf_var_sym(obj, var_name);
2969                 if (IS_ERR(sym)) {
2970                         pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
2971                                  sec_name, var_name);
2972                         return -ENOENT;
2973                 }
2974
2975                 if (fixup_offsets)
2976                         vsi->offset = sym->st_value;
2977
2978                 /* if variable is a global/weak symbol, but has restricted
2979                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
2980                  * as static. This follows similar logic for functions (BPF
2981                  * subprogs) and influences libbpf's further decisions about
2982                  * whether to make global data BPF array maps as
2983                  * BPF_F_MMAPABLE.
2984                  */
2985                 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
2986                     || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
2987                         var->linkage = BTF_VAR_STATIC;
2988         }
2989
2990 sort_vars:
2991         qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
2992         return 0;
2993 }
2994
2995 static int bpf_object_fixup_btf(struct bpf_object *obj)
2996 {
2997         int i, n, err = 0;
2998
2999         if (!obj->btf)
3000                 return 0;
3001
3002         n = btf__type_cnt(obj->btf);
3003         for (i = 1; i < n; i++) {
3004                 struct btf_type *t = btf_type_by_id(obj->btf, i);
3005
3006                 /* Loader needs to fix up some of the things compiler
3007                  * couldn't get its hands on while emitting BTF. This
3008                  * is section size and global variable offset. We use
3009                  * the info from the ELF itself for this purpose.
3010                  */
3011                 if (btf_is_datasec(t)) {
3012                         err = btf_fixup_datasec(obj, obj->btf, t);
3013                         if (err)
3014                                 return err;
3015                 }
3016         }
3017
3018         return 0;
3019 }
3020
3021 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3022 {
3023         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3024             prog->type == BPF_PROG_TYPE_LSM)
3025                 return true;
3026
3027         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3028          * also need vmlinux BTF
3029          */
3030         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3031                 return true;
3032
3033         return false;
3034 }
3035
3036 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3037 {
3038         struct bpf_program *prog;
3039         int i;
3040
3041         /* CO-RE relocations need kernel BTF, only when btf_custom_path
3042          * is not specified
3043          */
3044         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3045                 return true;
3046
3047         /* Support for typed ksyms needs kernel BTF */
3048         for (i = 0; i < obj->nr_extern; i++) {
3049                 const struct extern_desc *ext;
3050
3051                 ext = &obj->externs[i];
3052                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3053                         return true;
3054         }
3055
3056         bpf_object__for_each_program(prog, obj) {
3057                 if (!prog->autoload)
3058                         continue;
3059                 if (prog_needs_vmlinux_btf(prog))
3060                         return true;
3061         }
3062
3063         return false;
3064 }
3065
3066 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3067 {
3068         int err;
3069
3070         /* btf_vmlinux could be loaded earlier */
3071         if (obj->btf_vmlinux || obj->gen_loader)
3072                 return 0;
3073
3074         if (!force && !obj_needs_vmlinux_btf(obj))
3075                 return 0;
3076
3077         obj->btf_vmlinux = btf__load_vmlinux_btf();
3078         err = libbpf_get_error(obj->btf_vmlinux);
3079         if (err) {
3080                 pr_warn("Error loading vmlinux BTF: %d\n", err);
3081                 obj->btf_vmlinux = NULL;
3082                 return err;
3083         }
3084         return 0;
3085 }
3086
3087 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3088 {
3089         struct btf *kern_btf = obj->btf;
3090         bool btf_mandatory, sanitize;
3091         int i, err = 0;
3092
3093         if (!obj->btf)
3094                 return 0;
3095
3096         if (!kernel_supports(obj, FEAT_BTF)) {
3097                 if (kernel_needs_btf(obj)) {
3098                         err = -EOPNOTSUPP;
3099                         goto report;
3100                 }
3101                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3102                 return 0;
3103         }
3104
3105         /* Even though some subprogs are global/weak, user might prefer more
3106          * permissive BPF verification process that BPF verifier performs for
3107          * static functions, taking into account more context from the caller
3108          * functions. In such case, they need to mark such subprogs with
3109          * __attribute__((visibility("hidden"))) and libbpf will adjust
3110          * corresponding FUNC BTF type to be marked as static and trigger more
3111          * involved BPF verification process.
3112          */
3113         for (i = 0; i < obj->nr_programs; i++) {
3114                 struct bpf_program *prog = &obj->programs[i];
3115                 struct btf_type *t;
3116                 const char *name;
3117                 int j, n;
3118
3119                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3120                         continue;
3121
3122                 n = btf__type_cnt(obj->btf);
3123                 for (j = 1; j < n; j++) {
3124                         t = btf_type_by_id(obj->btf, j);
3125                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3126                                 continue;
3127
3128                         name = btf__str_by_offset(obj->btf, t->name_off);
3129                         if (strcmp(name, prog->name) != 0)
3130                                 continue;
3131
3132                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3133                         break;
3134                 }
3135         }
3136
3137         sanitize = btf_needs_sanitization(obj);
3138         if (sanitize) {
3139                 const void *raw_data;
3140                 __u32 sz;
3141
3142                 /* clone BTF to sanitize a copy and leave the original intact */
3143                 raw_data = btf__raw_data(obj->btf, &sz);
3144                 kern_btf = btf__new(raw_data, sz);
3145                 err = libbpf_get_error(kern_btf);
3146                 if (err)
3147                         return err;
3148
3149                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3150                 btf__set_pointer_size(obj->btf, 8);
3151                 err = bpf_object__sanitize_btf(obj, kern_btf);
3152                 if (err)
3153                         return err;
3154         }
3155
3156         if (obj->gen_loader) {
3157                 __u32 raw_size = 0;
3158                 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3159
3160                 if (!raw_data)
3161                         return -ENOMEM;
3162                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3163                 /* Pretend to have valid FD to pass various fd >= 0 checks.
3164                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3165                  */
3166                 btf__set_fd(kern_btf, 0);
3167         } else {
3168                 /* currently BPF_BTF_LOAD only supports log_level 1 */
3169                 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3170                                            obj->log_level ? 1 : 0);
3171         }
3172         if (sanitize) {
3173                 if (!err) {
3174                         /* move fd to libbpf's BTF */
3175                         btf__set_fd(obj->btf, btf__fd(kern_btf));
3176                         btf__set_fd(kern_btf, -1);
3177                 }
3178                 btf__free(kern_btf);
3179         }
3180 report:
3181         if (err) {
3182                 btf_mandatory = kernel_needs_btf(obj);
3183                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3184                         btf_mandatory ? "BTF is mandatory, can't proceed."
3185                                       : "BTF is optional, ignoring.");
3186                 if (!btf_mandatory)
3187                         err = 0;
3188         }
3189         return err;
3190 }
3191
3192 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3193 {
3194         const char *name;
3195
3196         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3197         if (!name) {
3198                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3199                         off, obj->path, elf_errmsg(-1));
3200                 return NULL;
3201         }
3202
3203         return name;
3204 }
3205
3206 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3207 {
3208         const char *name;
3209
3210         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3211         if (!name) {
3212                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3213                         off, obj->path, elf_errmsg(-1));
3214                 return NULL;
3215         }
3216
3217         return name;
3218 }
3219
3220 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3221 {
3222         Elf_Scn *scn;
3223
3224         scn = elf_getscn(obj->efile.elf, idx);
3225         if (!scn) {
3226                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3227                         idx, obj->path, elf_errmsg(-1));
3228                 return NULL;
3229         }
3230         return scn;
3231 }
3232
3233 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3234 {
3235         Elf_Scn *scn = NULL;
3236         Elf *elf = obj->efile.elf;
3237         const char *sec_name;
3238
3239         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3240                 sec_name = elf_sec_name(obj, scn);
3241                 if (!sec_name)
3242                         return NULL;
3243
3244                 if (strcmp(sec_name, name) != 0)
3245                         continue;
3246
3247                 return scn;
3248         }
3249         return NULL;
3250 }
3251
3252 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3253 {
3254         Elf64_Shdr *shdr;
3255
3256         if (!scn)
3257                 return NULL;
3258
3259         shdr = elf64_getshdr(scn);
3260         if (!shdr) {
3261                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3262                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3263                 return NULL;
3264         }
3265
3266         return shdr;
3267 }
3268
3269 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3270 {
3271         const char *name;
3272         Elf64_Shdr *sh;
3273
3274         if (!scn)
3275                 return NULL;
3276
3277         sh = elf_sec_hdr(obj, scn);
3278         if (!sh)
3279                 return NULL;
3280
3281         name = elf_sec_str(obj, sh->sh_name);
3282         if (!name) {
3283                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3284                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3285                 return NULL;
3286         }
3287
3288         return name;
3289 }
3290
3291 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3292 {
3293         Elf_Data *data;
3294
3295         if (!scn)
3296                 return NULL;
3297
3298         data = elf_getdata(scn, 0);
3299         if (!data) {
3300                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3301                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3302                         obj->path, elf_errmsg(-1));
3303                 return NULL;
3304         }
3305
3306         return data;
3307 }
3308
3309 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3310 {
3311         if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3312                 return NULL;
3313
3314         return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3315 }
3316
3317 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3318 {
3319         if (idx >= data->d_size / sizeof(Elf64_Rel))
3320                 return NULL;
3321
3322         return (Elf64_Rel *)data->d_buf + idx;
3323 }
3324
3325 static bool is_sec_name_dwarf(const char *name)
3326 {
3327         /* approximation, but the actual list is too long */
3328         return str_has_pfx(name, ".debug_");
3329 }
3330
3331 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3332 {
3333         /* no special handling of .strtab */
3334         if (hdr->sh_type == SHT_STRTAB)
3335                 return true;
3336
3337         /* ignore .llvm_addrsig section as well */
3338         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3339                 return true;
3340
3341         /* no subprograms will lead to an empty .text section, ignore it */
3342         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3343             strcmp(name, ".text") == 0)
3344                 return true;
3345
3346         /* DWARF sections */
3347         if (is_sec_name_dwarf(name))
3348                 return true;
3349
3350         if (str_has_pfx(name, ".rel")) {
3351                 name += sizeof(".rel") - 1;
3352                 /* DWARF section relocations */
3353                 if (is_sec_name_dwarf(name))
3354                         return true;
3355
3356                 /* .BTF and .BTF.ext don't need relocations */
3357                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3358                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
3359                         return true;
3360         }
3361
3362         return false;
3363 }
3364
3365 static int cmp_progs(const void *_a, const void *_b)
3366 {
3367         const struct bpf_program *a = _a;
3368         const struct bpf_program *b = _b;
3369
3370         if (a->sec_idx != b->sec_idx)
3371                 return a->sec_idx < b->sec_idx ? -1 : 1;
3372
3373         /* sec_insn_off can't be the same within the section */
3374         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3375 }
3376
3377 static int bpf_object__elf_collect(struct bpf_object *obj)
3378 {
3379         struct elf_sec_desc *sec_desc;
3380         Elf *elf = obj->efile.elf;
3381         Elf_Data *btf_ext_data = NULL;
3382         Elf_Data *btf_data = NULL;
3383         int idx = 0, err = 0;
3384         const char *name;
3385         Elf_Data *data;
3386         Elf_Scn *scn;
3387         Elf64_Shdr *sh;
3388
3389         /* ELF section indices are 0-based, but sec #0 is special "invalid"
3390          * section. Since section count retrieved by elf_getshdrnum() does
3391          * include sec #0, it is already the necessary size of an array to keep
3392          * all the sections.
3393          */
3394         if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3395                 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3396                         obj->path, elf_errmsg(-1));
3397                 return -LIBBPF_ERRNO__FORMAT;
3398         }
3399         obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3400         if (!obj->efile.secs)
3401                 return -ENOMEM;
3402
3403         /* a bunch of ELF parsing functionality depends on processing symbols,
3404          * so do the first pass and find the symbol table
3405          */
3406         scn = NULL;
3407         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3408                 sh = elf_sec_hdr(obj, scn);
3409                 if (!sh)
3410                         return -LIBBPF_ERRNO__FORMAT;
3411
3412                 if (sh->sh_type == SHT_SYMTAB) {
3413                         if (obj->efile.symbols) {
3414                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3415                                 return -LIBBPF_ERRNO__FORMAT;
3416                         }
3417
3418                         data = elf_sec_data(obj, scn);
3419                         if (!data)
3420                                 return -LIBBPF_ERRNO__FORMAT;
3421
3422                         idx = elf_ndxscn(scn);
3423
3424                         obj->efile.symbols = data;
3425                         obj->efile.symbols_shndx = idx;
3426                         obj->efile.strtabidx = sh->sh_link;
3427                 }
3428         }
3429
3430         if (!obj->efile.symbols) {
3431                 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3432                         obj->path);
3433                 return -ENOENT;
3434         }
3435
3436         scn = NULL;
3437         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3438                 idx = elf_ndxscn(scn);
3439                 sec_desc = &obj->efile.secs[idx];
3440
3441                 sh = elf_sec_hdr(obj, scn);
3442                 if (!sh)
3443                         return -LIBBPF_ERRNO__FORMAT;
3444
3445                 name = elf_sec_str(obj, sh->sh_name);
3446                 if (!name)
3447                         return -LIBBPF_ERRNO__FORMAT;
3448
3449                 if (ignore_elf_section(sh, name))
3450                         continue;
3451
3452                 data = elf_sec_data(obj, scn);
3453                 if (!data)
3454                         return -LIBBPF_ERRNO__FORMAT;
3455
3456                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3457                          idx, name, (unsigned long)data->d_size,
3458                          (int)sh->sh_link, (unsigned long)sh->sh_flags,
3459                          (int)sh->sh_type);
3460
3461                 if (strcmp(name, "license") == 0) {
3462                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3463                         if (err)
3464                                 return err;
3465                 } else if (strcmp(name, "version") == 0) {
3466                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3467                         if (err)
3468                                 return err;
3469                 } else if (strcmp(name, "maps") == 0) {
3470                         pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3471                         return -ENOTSUP;
3472                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3473                         obj->efile.btf_maps_shndx = idx;
3474                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3475                         if (sh->sh_type != SHT_PROGBITS)
3476                                 return -LIBBPF_ERRNO__FORMAT;
3477                         btf_data = data;
3478                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3479                         if (sh->sh_type != SHT_PROGBITS)
3480                                 return -LIBBPF_ERRNO__FORMAT;
3481                         btf_ext_data = data;
3482                 } else if (sh->sh_type == SHT_SYMTAB) {
3483                         /* already processed during the first pass above */
3484                 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3485                         if (sh->sh_flags & SHF_EXECINSTR) {
3486                                 if (strcmp(name, ".text") == 0)
3487                                         obj->efile.text_shndx = idx;
3488                                 err = bpf_object__add_programs(obj, data, name, idx);
3489                                 if (err)
3490                                         return err;
3491                         } else if (strcmp(name, DATA_SEC) == 0 ||
3492                                    str_has_pfx(name, DATA_SEC ".")) {
3493                                 sec_desc->sec_type = SEC_DATA;
3494                                 sec_desc->shdr = sh;
3495                                 sec_desc->data = data;
3496                         } else if (strcmp(name, RODATA_SEC) == 0 ||
3497                                    str_has_pfx(name, RODATA_SEC ".")) {
3498                                 sec_desc->sec_type = SEC_RODATA;
3499                                 sec_desc->shdr = sh;
3500                                 sec_desc->data = data;
3501                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3502                                 obj->efile.st_ops_data = data;
3503                                 obj->efile.st_ops_shndx = idx;
3504                         } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
3505                                 obj->efile.st_ops_link_data = data;
3506                                 obj->efile.st_ops_link_shndx = idx;
3507                         } else {
3508                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3509                                         idx, name);
3510                         }
3511                 } else if (sh->sh_type == SHT_REL) {
3512                         int targ_sec_idx = sh->sh_info; /* points to other section */
3513
3514                         if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3515                             targ_sec_idx >= obj->efile.sec_cnt)
3516                                 return -LIBBPF_ERRNO__FORMAT;
3517
3518                         /* Only do relo for section with exec instructions */
3519                         if (!section_have_execinstr(obj, targ_sec_idx) &&
3520                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3521                             strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3522                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3523                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3524                                         idx, name, targ_sec_idx,
3525                                         elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3526                                 continue;
3527                         }
3528
3529                         sec_desc->sec_type = SEC_RELO;
3530                         sec_desc->shdr = sh;
3531                         sec_desc->data = data;
3532                 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3533                                                          str_has_pfx(name, BSS_SEC "."))) {
3534                         sec_desc->sec_type = SEC_BSS;
3535                         sec_desc->shdr = sh;
3536                         sec_desc->data = data;
3537                 } else {
3538                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3539                                 (size_t)sh->sh_size);
3540                 }
3541         }
3542
3543         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3544                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3545                 return -LIBBPF_ERRNO__FORMAT;
3546         }
3547
3548         /* sort BPF programs by section name and in-section instruction offset
3549          * for faster search
3550          */
3551         if (obj->nr_programs)
3552                 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3553
3554         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3555 }
3556
3557 static bool sym_is_extern(const Elf64_Sym *sym)
3558 {
3559         int bind = ELF64_ST_BIND(sym->st_info);
3560         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3561         return sym->st_shndx == SHN_UNDEF &&
3562                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3563                ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3564 }
3565
3566 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3567 {
3568         int bind = ELF64_ST_BIND(sym->st_info);
3569         int type = ELF64_ST_TYPE(sym->st_info);
3570
3571         /* in .text section */
3572         if (sym->st_shndx != text_shndx)
3573                 return false;
3574
3575         /* local function */
3576         if (bind == STB_LOCAL && type == STT_SECTION)
3577                 return true;
3578
3579         /* global function */
3580         return bind == STB_GLOBAL && type == STT_FUNC;
3581 }
3582
3583 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3584 {
3585         const struct btf_type *t;
3586         const char *tname;
3587         int i, n;
3588
3589         if (!btf)
3590                 return -ESRCH;
3591
3592         n = btf__type_cnt(btf);
3593         for (i = 1; i < n; i++) {
3594                 t = btf__type_by_id(btf, i);
3595
3596                 if (!btf_is_var(t) && !btf_is_func(t))
3597                         continue;
3598
3599                 tname = btf__name_by_offset(btf, t->name_off);
3600                 if (strcmp(tname, ext_name))
3601                         continue;
3602
3603                 if (btf_is_var(t) &&
3604                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3605                         return -EINVAL;
3606
3607                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3608                         return -EINVAL;
3609
3610                 return i;
3611         }
3612
3613         return -ENOENT;
3614 }
3615
3616 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3617         const struct btf_var_secinfo *vs;
3618         const struct btf_type *t;
3619         int i, j, n;
3620
3621         if (!btf)
3622                 return -ESRCH;
3623
3624         n = btf__type_cnt(btf);
3625         for (i = 1; i < n; i++) {
3626                 t = btf__type_by_id(btf, i);
3627
3628                 if (!btf_is_datasec(t))
3629                         continue;
3630
3631                 vs = btf_var_secinfos(t);
3632                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3633                         if (vs->type == ext_btf_id)
3634                                 return i;
3635                 }
3636         }
3637
3638         return -ENOENT;
3639 }
3640
3641 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3642                                      bool *is_signed)
3643 {
3644         const struct btf_type *t;
3645         const char *name;
3646
3647         t = skip_mods_and_typedefs(btf, id, NULL);
3648         name = btf__name_by_offset(btf, t->name_off);
3649
3650         if (is_signed)
3651                 *is_signed = false;
3652         switch (btf_kind(t)) {
3653         case BTF_KIND_INT: {
3654                 int enc = btf_int_encoding(t);
3655
3656                 if (enc & BTF_INT_BOOL)
3657                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3658                 if (is_signed)
3659                         *is_signed = enc & BTF_INT_SIGNED;
3660                 if (t->size == 1)
3661                         return KCFG_CHAR;
3662                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3663                         return KCFG_UNKNOWN;
3664                 return KCFG_INT;
3665         }
3666         case BTF_KIND_ENUM:
3667                 if (t->size != 4)
3668                         return KCFG_UNKNOWN;
3669                 if (strcmp(name, "libbpf_tristate"))
3670                         return KCFG_UNKNOWN;
3671                 return KCFG_TRISTATE;
3672         case BTF_KIND_ENUM64:
3673                 if (strcmp(name, "libbpf_tristate"))
3674                         return KCFG_UNKNOWN;
3675                 return KCFG_TRISTATE;
3676         case BTF_KIND_ARRAY:
3677                 if (btf_array(t)->nelems == 0)
3678                         return KCFG_UNKNOWN;
3679                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3680                         return KCFG_UNKNOWN;
3681                 return KCFG_CHAR_ARR;
3682         default:
3683                 return KCFG_UNKNOWN;
3684         }
3685 }
3686
3687 static int cmp_externs(const void *_a, const void *_b)
3688 {
3689         const struct extern_desc *a = _a;
3690         const struct extern_desc *b = _b;
3691
3692         if (a->type != b->type)
3693                 return a->type < b->type ? -1 : 1;
3694
3695         if (a->type == EXT_KCFG) {
3696                 /* descending order by alignment requirements */
3697                 if (a->kcfg.align != b->kcfg.align)
3698                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3699                 /* ascending order by size, within same alignment class */
3700                 if (a->kcfg.sz != b->kcfg.sz)
3701                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3702         }
3703
3704         /* resolve ties by name */
3705         return strcmp(a->name, b->name);
3706 }
3707
3708 static int find_int_btf_id(const struct btf *btf)
3709 {
3710         const struct btf_type *t;
3711         int i, n;
3712
3713         n = btf__type_cnt(btf);
3714         for (i = 1; i < n; i++) {
3715                 t = btf__type_by_id(btf, i);
3716
3717                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3718                         return i;
3719         }
3720
3721         return 0;
3722 }
3723
3724 static int add_dummy_ksym_var(struct btf *btf)
3725 {
3726         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3727         const struct btf_var_secinfo *vs;
3728         const struct btf_type *sec;
3729
3730         if (!btf)
3731                 return 0;
3732
3733         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3734                                             BTF_KIND_DATASEC);
3735         if (sec_btf_id < 0)
3736                 return 0;
3737
3738         sec = btf__type_by_id(btf, sec_btf_id);
3739         vs = btf_var_secinfos(sec);
3740         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3741                 const struct btf_type *vt;
3742
3743                 vt = btf__type_by_id(btf, vs->type);
3744                 if (btf_is_func(vt))
3745                         break;
3746         }
3747
3748         /* No func in ksyms sec.  No need to add dummy var. */
3749         if (i == btf_vlen(sec))
3750                 return 0;
3751
3752         int_btf_id = find_int_btf_id(btf);
3753         dummy_var_btf_id = btf__add_var(btf,
3754                                         "dummy_ksym",
3755                                         BTF_VAR_GLOBAL_ALLOCATED,
3756                                         int_btf_id);
3757         if (dummy_var_btf_id < 0)
3758                 pr_warn("cannot create a dummy_ksym var\n");
3759
3760         return dummy_var_btf_id;
3761 }
3762
3763 static int bpf_object__collect_externs(struct bpf_object *obj)
3764 {
3765         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3766         const struct btf_type *t;
3767         struct extern_desc *ext;
3768         int i, n, off, dummy_var_btf_id;
3769         const char *ext_name, *sec_name;
3770         Elf_Scn *scn;
3771         Elf64_Shdr *sh;
3772
3773         if (!obj->efile.symbols)
3774                 return 0;
3775
3776         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3777         sh = elf_sec_hdr(obj, scn);
3778         if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3779                 return -LIBBPF_ERRNO__FORMAT;
3780
3781         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3782         if (dummy_var_btf_id < 0)
3783                 return dummy_var_btf_id;
3784
3785         n = sh->sh_size / sh->sh_entsize;
3786         pr_debug("looking for externs among %d symbols...\n", n);
3787
3788         for (i = 0; i < n; i++) {
3789                 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
3790
3791                 if (!sym)
3792                         return -LIBBPF_ERRNO__FORMAT;
3793                 if (!sym_is_extern(sym))
3794                         continue;
3795                 ext_name = elf_sym_str(obj, sym->st_name);
3796                 if (!ext_name || !ext_name[0])
3797                         continue;
3798
3799                 ext = obj->externs;
3800                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3801                 if (!ext)
3802                         return -ENOMEM;
3803                 obj->externs = ext;
3804                 ext = &ext[obj->nr_extern];
3805                 memset(ext, 0, sizeof(*ext));
3806                 obj->nr_extern++;
3807
3808                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3809                 if (ext->btf_id <= 0) {
3810                         pr_warn("failed to find BTF for extern '%s': %d\n",
3811                                 ext_name, ext->btf_id);
3812                         return ext->btf_id;
3813                 }
3814                 t = btf__type_by_id(obj->btf, ext->btf_id);
3815                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3816                 ext->sym_idx = i;
3817                 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
3818
3819                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3820                 if (ext->sec_btf_id <= 0) {
3821                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3822                                 ext_name, ext->btf_id, ext->sec_btf_id);
3823                         return ext->sec_btf_id;
3824                 }
3825                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3826                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3827
3828                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3829                         if (btf_is_func(t)) {
3830                                 pr_warn("extern function %s is unsupported under %s section\n",
3831                                         ext->name, KCONFIG_SEC);
3832                                 return -ENOTSUP;
3833                         }
3834                         kcfg_sec = sec;
3835                         ext->type = EXT_KCFG;
3836                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3837                         if (ext->kcfg.sz <= 0) {
3838                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3839                                         ext_name, ext->kcfg.sz);
3840                                 return ext->kcfg.sz;
3841                         }
3842                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3843                         if (ext->kcfg.align <= 0) {
3844                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3845                                         ext_name, ext->kcfg.align);
3846                                 return -EINVAL;
3847                         }
3848                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3849                                                         &ext->kcfg.is_signed);
3850                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3851                                 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
3852                                 return -ENOTSUP;
3853                         }
3854                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3855                         ksym_sec = sec;
3856                         ext->type = EXT_KSYM;
3857                         skip_mods_and_typedefs(obj->btf, t->type,
3858                                                &ext->ksym.type_id);
3859                 } else {
3860                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3861                         return -ENOTSUP;
3862                 }
3863         }
3864         pr_debug("collected %d externs total\n", obj->nr_extern);
3865
3866         if (!obj->nr_extern)
3867                 return 0;
3868
3869         /* sort externs by type, for kcfg ones also by (align, size, name) */
3870         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3871
3872         /* for .ksyms section, we need to turn all externs into allocated
3873          * variables in BTF to pass kernel verification; we do this by
3874          * pretending that each extern is a 8-byte variable
3875          */
3876         if (ksym_sec) {
3877                 /* find existing 4-byte integer type in BTF to use for fake
3878                  * extern variables in DATASEC
3879                  */
3880                 int int_btf_id = find_int_btf_id(obj->btf);
3881                 /* For extern function, a dummy_var added earlier
3882                  * will be used to replace the vs->type and
3883                  * its name string will be used to refill
3884                  * the missing param's name.
3885                  */
3886                 const struct btf_type *dummy_var;
3887
3888                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3889                 for (i = 0; i < obj->nr_extern; i++) {
3890                         ext = &obj->externs[i];
3891                         if (ext->type != EXT_KSYM)
3892                                 continue;
3893                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3894                                  i, ext->sym_idx, ext->name);
3895                 }
3896
3897                 sec = ksym_sec;
3898                 n = btf_vlen(sec);
3899                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3900                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3901                         struct btf_type *vt;
3902
3903                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3904                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3905                         ext = find_extern_by_name(obj, ext_name);
3906                         if (!ext) {
3907                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3908                                         btf_kind_str(vt), ext_name);
3909                                 return -ESRCH;
3910                         }
3911                         if (btf_is_func(vt)) {
3912                                 const struct btf_type *func_proto;
3913                                 struct btf_param *param;
3914                                 int j;
3915
3916                                 func_proto = btf__type_by_id(obj->btf,
3917                                                              vt->type);
3918                                 param = btf_params(func_proto);
3919                                 /* Reuse the dummy_var string if the
3920                                  * func proto does not have param name.
3921                                  */
3922                                 for (j = 0; j < btf_vlen(func_proto); j++)
3923                                         if (param[j].type && !param[j].name_off)
3924                                                 param[j].name_off =
3925                                                         dummy_var->name_off;
3926                                 vs->type = dummy_var_btf_id;
3927                                 vt->info &= ~0xffff;
3928                                 vt->info |= BTF_FUNC_GLOBAL;
3929                         } else {
3930                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3931                                 vt->type = int_btf_id;
3932                         }
3933                         vs->offset = off;
3934                         vs->size = sizeof(int);
3935                 }
3936                 sec->size = off;
3937         }
3938
3939         if (kcfg_sec) {
3940                 sec = kcfg_sec;
3941                 /* for kcfg externs calculate their offsets within a .kconfig map */
3942                 off = 0;
3943                 for (i = 0; i < obj->nr_extern; i++) {
3944                         ext = &obj->externs[i];
3945                         if (ext->type != EXT_KCFG)
3946                                 continue;
3947
3948                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3949                         off = ext->kcfg.data_off + ext->kcfg.sz;
3950                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3951                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3952                 }
3953                 sec->size = off;
3954                 n = btf_vlen(sec);
3955                 for (i = 0; i < n; i++) {
3956                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3957
3958                         t = btf__type_by_id(obj->btf, vs->type);
3959                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3960                         ext = find_extern_by_name(obj, ext_name);
3961                         if (!ext) {
3962                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3963                                         ext_name);
3964                                 return -ESRCH;
3965                         }
3966                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3967                         vs->offset = ext->kcfg.data_off;
3968                 }
3969         }
3970         return 0;
3971 }
3972
3973 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
3974 {
3975         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3976 }
3977
3978 struct bpf_program *
3979 bpf_object__find_program_by_name(const struct bpf_object *obj,
3980                                  const char *name)
3981 {
3982         struct bpf_program *prog;
3983
3984         bpf_object__for_each_program(prog, obj) {
3985                 if (prog_is_subprog(obj, prog))
3986                         continue;
3987                 if (!strcmp(prog->name, name))
3988                         return prog;
3989         }
3990         return errno = ENOENT, NULL;
3991 }
3992
3993 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3994                                       int shndx)
3995 {
3996         switch (obj->efile.secs[shndx].sec_type) {
3997         case SEC_BSS:
3998         case SEC_DATA:
3999         case SEC_RODATA:
4000                 return true;
4001         default:
4002                 return false;
4003         }
4004 }
4005
4006 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4007                                       int shndx)
4008 {
4009         return shndx == obj->efile.btf_maps_shndx;
4010 }
4011
4012 static enum libbpf_map_type
4013 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4014 {
4015         if (shndx == obj->efile.symbols_shndx)
4016                 return LIBBPF_MAP_KCONFIG;
4017
4018         switch (obj->efile.secs[shndx].sec_type) {
4019         case SEC_BSS:
4020                 return LIBBPF_MAP_BSS;
4021         case SEC_DATA:
4022                 return LIBBPF_MAP_DATA;
4023         case SEC_RODATA:
4024                 return LIBBPF_MAP_RODATA;
4025         default:
4026                 return LIBBPF_MAP_UNSPEC;
4027         }
4028 }
4029
4030 static int bpf_program__record_reloc(struct bpf_program *prog,
4031                                      struct reloc_desc *reloc_desc,
4032                                      __u32 insn_idx, const char *sym_name,
4033                                      const Elf64_Sym *sym, const Elf64_Rel *rel)
4034 {
4035         struct bpf_insn *insn = &prog->insns[insn_idx];
4036         size_t map_idx, nr_maps = prog->obj->nr_maps;
4037         struct bpf_object *obj = prog->obj;
4038         __u32 shdr_idx = sym->st_shndx;
4039         enum libbpf_map_type type;
4040         const char *sym_sec_name;
4041         struct bpf_map *map;
4042
4043         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4044                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4045                         prog->name, sym_name, insn_idx, insn->code);
4046                 return -LIBBPF_ERRNO__RELOC;
4047         }
4048
4049         if (sym_is_extern(sym)) {
4050                 int sym_idx = ELF64_R_SYM(rel->r_info);
4051                 int i, n = obj->nr_extern;
4052                 struct extern_desc *ext;
4053
4054                 for (i = 0; i < n; i++) {
4055                         ext = &obj->externs[i];
4056                         if (ext->sym_idx == sym_idx)
4057                                 break;
4058                 }
4059                 if (i >= n) {
4060                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4061                                 prog->name, sym_name, sym_idx);
4062                         return -LIBBPF_ERRNO__RELOC;
4063                 }
4064                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4065                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
4066                 if (insn->code == (BPF_JMP | BPF_CALL))
4067                         reloc_desc->type = RELO_EXTERN_CALL;
4068                 else
4069                         reloc_desc->type = RELO_EXTERN_LD64;
4070                 reloc_desc->insn_idx = insn_idx;
4071                 reloc_desc->ext_idx = i;
4072                 return 0;
4073         }
4074
4075         /* sub-program call relocation */
4076         if (is_call_insn(insn)) {
4077                 if (insn->src_reg != BPF_PSEUDO_CALL) {
4078                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4079                         return -LIBBPF_ERRNO__RELOC;
4080                 }
4081                 /* text_shndx can be 0, if no default "main" program exists */
4082                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4083                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4084                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4085                                 prog->name, sym_name, sym_sec_name);
4086                         return -LIBBPF_ERRNO__RELOC;
4087                 }
4088                 if (sym->st_value % BPF_INSN_SZ) {
4089                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4090                                 prog->name, sym_name, (size_t)sym->st_value);
4091                         return -LIBBPF_ERRNO__RELOC;
4092                 }
4093                 reloc_desc->type = RELO_CALL;
4094                 reloc_desc->insn_idx = insn_idx;
4095                 reloc_desc->sym_off = sym->st_value;
4096                 return 0;
4097         }
4098
4099         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4100                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4101                         prog->name, sym_name, shdr_idx);
4102                 return -LIBBPF_ERRNO__RELOC;
4103         }
4104
4105         /* loading subprog addresses */
4106         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4107                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4108                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
4109                  */
4110                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4111                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4112                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4113                         return -LIBBPF_ERRNO__RELOC;
4114                 }
4115
4116                 reloc_desc->type = RELO_SUBPROG_ADDR;
4117                 reloc_desc->insn_idx = insn_idx;
4118                 reloc_desc->sym_off = sym->st_value;
4119                 return 0;
4120         }
4121
4122         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4123         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4124
4125         /* generic map reference relocation */
4126         if (type == LIBBPF_MAP_UNSPEC) {
4127                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4128                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4129                                 prog->name, sym_name, sym_sec_name);
4130                         return -LIBBPF_ERRNO__RELOC;
4131                 }
4132                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4133                         map = &obj->maps[map_idx];
4134                         if (map->libbpf_type != type ||
4135                             map->sec_idx != sym->st_shndx ||
4136                             map->sec_offset != sym->st_value)
4137                                 continue;
4138                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4139                                  prog->name, map_idx, map->name, map->sec_idx,
4140                                  map->sec_offset, insn_idx);
4141                         break;
4142                 }
4143                 if (map_idx >= nr_maps) {
4144                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4145                                 prog->name, sym_sec_name, (size_t)sym->st_value);
4146                         return -LIBBPF_ERRNO__RELOC;
4147                 }
4148                 reloc_desc->type = RELO_LD64;
4149                 reloc_desc->insn_idx = insn_idx;
4150                 reloc_desc->map_idx = map_idx;
4151                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4152                 return 0;
4153         }
4154
4155         /* global data map relocation */
4156         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4157                 pr_warn("prog '%s': bad data relo against section '%s'\n",
4158                         prog->name, sym_sec_name);
4159                 return -LIBBPF_ERRNO__RELOC;
4160         }
4161         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4162                 map = &obj->maps[map_idx];
4163                 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4164                         continue;
4165                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4166                          prog->name, map_idx, map->name, map->sec_idx,
4167                          map->sec_offset, insn_idx);
4168                 break;
4169         }
4170         if (map_idx >= nr_maps) {
4171                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4172                         prog->name, sym_sec_name);
4173                 return -LIBBPF_ERRNO__RELOC;
4174         }
4175
4176         reloc_desc->type = RELO_DATA;
4177         reloc_desc->insn_idx = insn_idx;
4178         reloc_desc->map_idx = map_idx;
4179         reloc_desc->sym_off = sym->st_value;
4180         return 0;
4181 }
4182
4183 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4184 {
4185         return insn_idx >= prog->sec_insn_off &&
4186                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4187 }
4188
4189 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4190                                                  size_t sec_idx, size_t insn_idx)
4191 {
4192         int l = 0, r = obj->nr_programs - 1, m;
4193         struct bpf_program *prog;
4194
4195         if (!obj->nr_programs)
4196                 return NULL;
4197
4198         while (l < r) {
4199                 m = l + (r - l + 1) / 2;
4200                 prog = &obj->programs[m];
4201
4202                 if (prog->sec_idx < sec_idx ||
4203                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4204                         l = m;
4205                 else
4206                         r = m - 1;
4207         }
4208         /* matching program could be at index l, but it still might be the
4209          * wrong one, so we need to double check conditions for the last time
4210          */
4211         prog = &obj->programs[l];
4212         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4213                 return prog;
4214         return NULL;
4215 }
4216
4217 static int
4218 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4219 {
4220         const char *relo_sec_name, *sec_name;
4221         size_t sec_idx = shdr->sh_info, sym_idx;
4222         struct bpf_program *prog;
4223         struct reloc_desc *relos;
4224         int err, i, nrels;
4225         const char *sym_name;
4226         __u32 insn_idx;
4227         Elf_Scn *scn;
4228         Elf_Data *scn_data;
4229         Elf64_Sym *sym;
4230         Elf64_Rel *rel;
4231
4232         if (sec_idx >= obj->efile.sec_cnt)
4233                 return -EINVAL;
4234
4235         scn = elf_sec_by_idx(obj, sec_idx);
4236         scn_data = elf_sec_data(obj, scn);
4237
4238         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4239         sec_name = elf_sec_name(obj, scn);
4240         if (!relo_sec_name || !sec_name)
4241                 return -EINVAL;
4242
4243         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4244                  relo_sec_name, sec_idx, sec_name);
4245         nrels = shdr->sh_size / shdr->sh_entsize;
4246
4247         for (i = 0; i < nrels; i++) {
4248                 rel = elf_rel_by_idx(data, i);
4249                 if (!rel) {
4250                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4251                         return -LIBBPF_ERRNO__FORMAT;
4252                 }
4253
4254                 sym_idx = ELF64_R_SYM(rel->r_info);
4255                 sym = elf_sym_by_idx(obj, sym_idx);
4256                 if (!sym) {
4257                         pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4258                                 relo_sec_name, sym_idx, i);
4259                         return -LIBBPF_ERRNO__FORMAT;
4260                 }
4261
4262                 if (sym->st_shndx >= obj->efile.sec_cnt) {
4263                         pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4264                                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4265                         return -LIBBPF_ERRNO__FORMAT;
4266                 }
4267
4268                 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4269                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4270                                 relo_sec_name, (size_t)rel->r_offset, i);
4271                         return -LIBBPF_ERRNO__FORMAT;
4272                 }
4273
4274                 insn_idx = rel->r_offset / BPF_INSN_SZ;
4275                 /* relocations against static functions are recorded as
4276                  * relocations against the section that contains a function;
4277                  * in such case, symbol will be STT_SECTION and sym.st_name
4278                  * will point to empty string (0), so fetch section name
4279                  * instead
4280                  */
4281                 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4282                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4283                 else
4284                         sym_name = elf_sym_str(obj, sym->st_name);
4285                 sym_name = sym_name ?: "<?";
4286
4287                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4288                          relo_sec_name, i, insn_idx, sym_name);
4289
4290                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4291                 if (!prog) {
4292                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4293                                 relo_sec_name, i, sec_name, insn_idx);
4294                         continue;
4295                 }
4296
4297                 relos = libbpf_reallocarray(prog->reloc_desc,
4298                                             prog->nr_reloc + 1, sizeof(*relos));
4299                 if (!relos)
4300                         return -ENOMEM;
4301                 prog->reloc_desc = relos;
4302
4303                 /* adjust insn_idx to local BPF program frame of reference */
4304                 insn_idx -= prog->sec_insn_off;
4305                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4306                                                 insn_idx, sym_name, sym, rel);
4307                 if (err)
4308                         return err;
4309
4310                 prog->nr_reloc++;
4311         }
4312         return 0;
4313 }
4314
4315 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4316 {
4317         int id;
4318
4319         if (!obj->btf)
4320                 return -ENOENT;
4321
4322         /* if it's BTF-defined map, we don't need to search for type IDs.
4323          * For struct_ops map, it does not need btf_key_type_id and
4324          * btf_value_type_id.
4325          */
4326         if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4327                 return 0;
4328
4329         /*
4330          * LLVM annotates global data differently in BTF, that is,
4331          * only as '.data', '.bss' or '.rodata'.
4332          */
4333         if (!bpf_map__is_internal(map))
4334                 return -ENOENT;
4335
4336         id = btf__find_by_name(obj->btf, map->real_name);
4337         if (id < 0)
4338                 return id;
4339
4340         map->btf_key_type_id = 0;
4341         map->btf_value_type_id = id;
4342         return 0;
4343 }
4344
4345 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4346 {
4347         char file[PATH_MAX], buff[4096];
4348         FILE *fp;
4349         __u32 val;
4350         int err;
4351
4352         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4353         memset(info, 0, sizeof(*info));
4354
4355         fp = fopen(file, "re");
4356         if (!fp) {
4357                 err = -errno;
4358                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4359                         err);
4360                 return err;
4361         }
4362
4363         while (fgets(buff, sizeof(buff), fp)) {
4364                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4365                         info->type = val;
4366                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4367                         info->key_size = val;
4368                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4369                         info->value_size = val;
4370                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4371                         info->max_entries = val;
4372                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4373                         info->map_flags = val;
4374         }
4375
4376         fclose(fp);
4377
4378         return 0;
4379 }
4380
4381 bool bpf_map__autocreate(const struct bpf_map *map)
4382 {
4383         return map->autocreate;
4384 }
4385
4386 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4387 {
4388         if (map->obj->loaded)
4389                 return libbpf_err(-EBUSY);
4390
4391         map->autocreate = autocreate;
4392         return 0;
4393 }
4394
4395 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4396 {
4397         struct bpf_map_info info;
4398         __u32 len = sizeof(info), name_len;
4399         int new_fd, err;
4400         char *new_name;
4401
4402         memset(&info, 0, len);
4403         err = bpf_map_get_info_by_fd(fd, &info, &len);
4404         if (err && errno == EINVAL)
4405                 err = bpf_get_map_info_from_fdinfo(fd, &info);
4406         if (err)
4407                 return libbpf_err(err);
4408
4409         name_len = strlen(info.name);
4410         if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4411                 new_name = strdup(map->name);
4412         else
4413                 new_name = strdup(info.name);
4414
4415         if (!new_name)
4416                 return libbpf_err(-errno);
4417
4418         /*
4419          * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4420          * This is similar to what we do in ensure_good_fd(), but without
4421          * closing original FD.
4422          */
4423         new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4424         if (new_fd < 0) {
4425                 err = -errno;
4426                 goto err_free_new_name;
4427         }
4428
4429         err = zclose(map->fd);
4430         if (err) {
4431                 err = -errno;
4432                 goto err_close_new_fd;
4433         }
4434         free(map->name);
4435
4436         map->fd = new_fd;
4437         map->name = new_name;
4438         map->def.type = info.type;
4439         map->def.key_size = info.key_size;
4440         map->def.value_size = info.value_size;
4441         map->def.max_entries = info.max_entries;
4442         map->def.map_flags = info.map_flags;
4443         map->btf_key_type_id = info.btf_key_type_id;
4444         map->btf_value_type_id = info.btf_value_type_id;
4445         map->reused = true;
4446         map->map_extra = info.map_extra;
4447
4448         return 0;
4449
4450 err_close_new_fd:
4451         close(new_fd);
4452 err_free_new_name:
4453         free(new_name);
4454         return libbpf_err(err);
4455 }
4456
4457 __u32 bpf_map__max_entries(const struct bpf_map *map)
4458 {
4459         return map->def.max_entries;
4460 }
4461
4462 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4463 {
4464         if (!bpf_map_type__is_map_in_map(map->def.type))
4465                 return errno = EINVAL, NULL;
4466
4467         return map->inner_map;
4468 }
4469
4470 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4471 {
4472         if (map->obj->loaded)
4473                 return libbpf_err(-EBUSY);
4474
4475         map->def.max_entries = max_entries;
4476
4477         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4478         if (map_is_ringbuf(map))
4479                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4480
4481         return 0;
4482 }
4483
4484 static int
4485 bpf_object__probe_loading(struct bpf_object *obj)
4486 {
4487         char *cp, errmsg[STRERR_BUFSIZE];
4488         struct bpf_insn insns[] = {
4489                 BPF_MOV64_IMM(BPF_REG_0, 0),
4490                 BPF_EXIT_INSN(),
4491         };
4492         int ret, insn_cnt = ARRAY_SIZE(insns);
4493
4494         if (obj->gen_loader)
4495                 return 0;
4496
4497         ret = bump_rlimit_memlock();
4498         if (ret)
4499                 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4500
4501         /* make sure basic loading works */
4502         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4503         if (ret < 0)
4504                 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4505         if (ret < 0) {
4506                 ret = errno;
4507                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4508                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4509                         "program. Make sure your kernel supports BPF "
4510                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4511                         "set to big enough value.\n", __func__, cp, ret);
4512                 return -ret;
4513         }
4514         close(ret);
4515
4516         return 0;
4517 }
4518
4519 static int probe_fd(int fd)
4520 {
4521         if (fd >= 0)
4522                 close(fd);
4523         return fd >= 0;
4524 }
4525
4526 static int probe_kern_prog_name(void)
4527 {
4528         const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
4529         struct bpf_insn insns[] = {
4530                 BPF_MOV64_IMM(BPF_REG_0, 0),
4531                 BPF_EXIT_INSN(),
4532         };
4533         union bpf_attr attr;
4534         int ret;
4535
4536         memset(&attr, 0, attr_sz);
4537         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4538         attr.license = ptr_to_u64("GPL");
4539         attr.insns = ptr_to_u64(insns);
4540         attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
4541         libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
4542
4543         /* make sure loading with name works */
4544         ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
4545         return probe_fd(ret);
4546 }
4547
4548 static int probe_kern_global_data(void)
4549 {
4550         char *cp, errmsg[STRERR_BUFSIZE];
4551         struct bpf_insn insns[] = {
4552                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4553                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4554                 BPF_MOV64_IMM(BPF_REG_0, 0),
4555                 BPF_EXIT_INSN(),
4556         };
4557         int ret, map, insn_cnt = ARRAY_SIZE(insns);
4558
4559         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
4560         if (map < 0) {
4561                 ret = -errno;
4562                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4563                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4564                         __func__, cp, -ret);
4565                 return ret;
4566         }
4567
4568         insns[0].imm = map;
4569
4570         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4571         close(map);
4572         return probe_fd(ret);
4573 }
4574
4575 static int probe_kern_btf(void)
4576 {
4577         static const char strs[] = "\0int";
4578         __u32 types[] = {
4579                 /* int */
4580                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4581         };
4582
4583         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4584                                              strs, sizeof(strs)));
4585 }
4586
4587 static int probe_kern_btf_func(void)
4588 {
4589         static const char strs[] = "\0int\0x\0a";
4590         /* void x(int a) {} */
4591         __u32 types[] = {
4592                 /* int */
4593                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4594                 /* FUNC_PROTO */                                /* [2] */
4595                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4596                 BTF_PARAM_ENC(7, 1),
4597                 /* FUNC x */                                    /* [3] */
4598                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4599         };
4600
4601         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4602                                              strs, sizeof(strs)));
4603 }
4604
4605 static int probe_kern_btf_func_global(void)
4606 {
4607         static const char strs[] = "\0int\0x\0a";
4608         /* static void x(int a) {} */
4609         __u32 types[] = {
4610                 /* int */
4611                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4612                 /* FUNC_PROTO */                                /* [2] */
4613                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4614                 BTF_PARAM_ENC(7, 1),
4615                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4616                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4617         };
4618
4619         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4620                                              strs, sizeof(strs)));
4621 }
4622
4623 static int probe_kern_btf_datasec(void)
4624 {
4625         static const char strs[] = "\0x\0.data";
4626         /* static int a; */
4627         __u32 types[] = {
4628                 /* int */
4629                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4630                 /* VAR x */                                     /* [2] */
4631                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4632                 BTF_VAR_STATIC,
4633                 /* DATASEC val */                               /* [3] */
4634                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4635                 BTF_VAR_SECINFO_ENC(2, 0, 4),
4636         };
4637
4638         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4639                                              strs, sizeof(strs)));
4640 }
4641
4642 static int probe_kern_btf_float(void)
4643 {
4644         static const char strs[] = "\0float";
4645         __u32 types[] = {
4646                 /* float */
4647                 BTF_TYPE_FLOAT_ENC(1, 4),
4648         };
4649
4650         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4651                                              strs, sizeof(strs)));
4652 }
4653
4654 static int probe_kern_btf_decl_tag(void)
4655 {
4656         static const char strs[] = "\0tag";
4657         __u32 types[] = {
4658                 /* int */
4659                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4660                 /* VAR x */                                     /* [2] */
4661                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4662                 BTF_VAR_STATIC,
4663                 /* attr */
4664                 BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
4665         };
4666
4667         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4668                                              strs, sizeof(strs)));
4669 }
4670
4671 static int probe_kern_btf_type_tag(void)
4672 {
4673         static const char strs[] = "\0tag";
4674         __u32 types[] = {
4675                 /* int */
4676                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),          /* [1] */
4677                 /* attr */
4678                 BTF_TYPE_TYPE_TAG_ENC(1, 1),                            /* [2] */
4679                 /* ptr */
4680                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),   /* [3] */
4681         };
4682
4683         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4684                                              strs, sizeof(strs)));
4685 }
4686
4687 static int probe_kern_array_mmap(void)
4688 {
4689         LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
4690         int fd;
4691
4692         fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
4693         return probe_fd(fd);
4694 }
4695
4696 static int probe_kern_exp_attach_type(void)
4697 {
4698         LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
4699         struct bpf_insn insns[] = {
4700                 BPF_MOV64_IMM(BPF_REG_0, 0),
4701                 BPF_EXIT_INSN(),
4702         };
4703         int fd, insn_cnt = ARRAY_SIZE(insns);
4704
4705         /* use any valid combination of program type and (optional)
4706          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4707          * to see if kernel supports expected_attach_type field for
4708          * BPF_PROG_LOAD command
4709          */
4710         fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
4711         return probe_fd(fd);
4712 }
4713
4714 static int probe_kern_probe_read_kernel(void)
4715 {
4716         struct bpf_insn insns[] = {
4717                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4718                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4719                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
4720                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
4721                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4722                 BPF_EXIT_INSN(),
4723         };
4724         int fd, insn_cnt = ARRAY_SIZE(insns);
4725
4726         fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4727         return probe_fd(fd);
4728 }
4729
4730 static int probe_prog_bind_map(void)
4731 {
4732         char *cp, errmsg[STRERR_BUFSIZE];
4733         struct bpf_insn insns[] = {
4734                 BPF_MOV64_IMM(BPF_REG_0, 0),
4735                 BPF_EXIT_INSN(),
4736         };
4737         int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
4738
4739         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
4740         if (map < 0) {
4741                 ret = -errno;
4742                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4743                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4744                         __func__, cp, -ret);
4745                 return ret;
4746         }
4747
4748         prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4749         if (prog < 0) {
4750                 close(map);
4751                 return 0;
4752         }
4753
4754         ret = bpf_prog_bind_map(prog, map, NULL);
4755
4756         close(map);
4757         close(prog);
4758
4759         return ret >= 0;
4760 }
4761
4762 static int probe_module_btf(void)
4763 {
4764         static const char strs[] = "\0int";
4765         __u32 types[] = {
4766                 /* int */
4767                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4768         };
4769         struct bpf_btf_info info;
4770         __u32 len = sizeof(info);
4771         char name[16];
4772         int fd, err;
4773
4774         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4775         if (fd < 0)
4776                 return 0; /* BTF not supported at all */
4777
4778         memset(&info, 0, sizeof(info));
4779         info.name = ptr_to_u64(name);
4780         info.name_len = sizeof(name);
4781
4782         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4783          * kernel's module BTF support coincides with support for
4784          * name/name_len fields in struct bpf_btf_info.
4785          */
4786         err = bpf_btf_get_info_by_fd(fd, &info, &len);
4787         close(fd);
4788         return !err;
4789 }
4790
4791 static int probe_perf_link(void)
4792 {
4793         struct bpf_insn insns[] = {
4794                 BPF_MOV64_IMM(BPF_REG_0, 0),
4795                 BPF_EXIT_INSN(),
4796         };
4797         int prog_fd, link_fd, err;
4798
4799         prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
4800                                 insns, ARRAY_SIZE(insns), NULL);
4801         if (prog_fd < 0)
4802                 return -errno;
4803
4804         /* use invalid perf_event FD to get EBADF, if link is supported;
4805          * otherwise EINVAL should be returned
4806          */
4807         link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
4808         err = -errno; /* close() can clobber errno */
4809
4810         if (link_fd >= 0)
4811                 close(link_fd);
4812         close(prog_fd);
4813
4814         return link_fd < 0 && err == -EBADF;
4815 }
4816
4817 static int probe_kern_bpf_cookie(void)
4818 {
4819         struct bpf_insn insns[] = {
4820                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
4821                 BPF_EXIT_INSN(),
4822         };
4823         int ret, insn_cnt = ARRAY_SIZE(insns);
4824
4825         ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
4826         return probe_fd(ret);
4827 }
4828
4829 static int probe_kern_btf_enum64(void)
4830 {
4831         static const char strs[] = "\0enum64";
4832         __u32 types[] = {
4833                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
4834         };
4835
4836         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4837                                              strs, sizeof(strs)));
4838 }
4839
4840 static int probe_kern_syscall_wrapper(void);
4841
4842 enum kern_feature_result {
4843         FEAT_UNKNOWN = 0,
4844         FEAT_SUPPORTED = 1,
4845         FEAT_MISSING = 2,
4846 };
4847
4848 typedef int (*feature_probe_fn)(void);
4849
4850 static struct kern_feature_desc {
4851         const char *desc;
4852         feature_probe_fn probe;
4853         enum kern_feature_result res;
4854 } feature_probes[__FEAT_CNT] = {
4855         [FEAT_PROG_NAME] = {
4856                 "BPF program name", probe_kern_prog_name,
4857         },
4858         [FEAT_GLOBAL_DATA] = {
4859                 "global variables", probe_kern_global_data,
4860         },
4861         [FEAT_BTF] = {
4862                 "minimal BTF", probe_kern_btf,
4863         },
4864         [FEAT_BTF_FUNC] = {
4865                 "BTF functions", probe_kern_btf_func,
4866         },
4867         [FEAT_BTF_GLOBAL_FUNC] = {
4868                 "BTF global function", probe_kern_btf_func_global,
4869         },
4870         [FEAT_BTF_DATASEC] = {
4871                 "BTF data section and variable", probe_kern_btf_datasec,
4872         },
4873         [FEAT_ARRAY_MMAP] = {
4874                 "ARRAY map mmap()", probe_kern_array_mmap,
4875         },
4876         [FEAT_EXP_ATTACH_TYPE] = {
4877                 "BPF_PROG_LOAD expected_attach_type attribute",
4878                 probe_kern_exp_attach_type,
4879         },
4880         [FEAT_PROBE_READ_KERN] = {
4881                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4882         },
4883         [FEAT_PROG_BIND_MAP] = {
4884                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4885         },
4886         [FEAT_MODULE_BTF] = {
4887                 "module BTF support", probe_module_btf,
4888         },
4889         [FEAT_BTF_FLOAT] = {
4890                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4891         },
4892         [FEAT_PERF_LINK] = {
4893                 "BPF perf link support", probe_perf_link,
4894         },
4895         [FEAT_BTF_DECL_TAG] = {
4896                 "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
4897         },
4898         [FEAT_BTF_TYPE_TAG] = {
4899                 "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
4900         },
4901         [FEAT_MEMCG_ACCOUNT] = {
4902                 "memcg-based memory accounting", probe_memcg_account,
4903         },
4904         [FEAT_BPF_COOKIE] = {
4905                 "BPF cookie support", probe_kern_bpf_cookie,
4906         },
4907         [FEAT_BTF_ENUM64] = {
4908                 "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
4909         },
4910         [FEAT_SYSCALL_WRAPPER] = {
4911                 "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
4912         },
4913 };
4914
4915 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4916 {
4917         struct kern_feature_desc *feat = &feature_probes[feat_id];
4918         int ret;
4919
4920         if (obj && obj->gen_loader)
4921                 /* To generate loader program assume the latest kernel
4922                  * to avoid doing extra prog_load, map_create syscalls.
4923                  */
4924                 return true;
4925
4926         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4927                 ret = feat->probe();
4928                 if (ret > 0) {
4929                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4930                 } else if (ret == 0) {
4931                         WRITE_ONCE(feat->res, FEAT_MISSING);
4932                 } else {
4933                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4934                         WRITE_ONCE(feat->res, FEAT_MISSING);
4935                 }
4936         }
4937
4938         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4939 }
4940
4941 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4942 {
4943         struct bpf_map_info map_info;
4944         char msg[STRERR_BUFSIZE];
4945         __u32 map_info_len = sizeof(map_info);
4946         int err;
4947
4948         memset(&map_info, 0, map_info_len);
4949         err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
4950         if (err && errno == EINVAL)
4951                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4952         if (err) {
4953                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4954                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4955                 return false;
4956         }
4957
4958         return (map_info.type == map->def.type &&
4959                 map_info.key_size == map->def.key_size &&
4960                 map_info.value_size == map->def.value_size &&
4961                 map_info.max_entries == map->def.max_entries &&
4962                 map_info.map_flags == map->def.map_flags &&
4963                 map_info.map_extra == map->map_extra);
4964 }
4965
4966 static int
4967 bpf_object__reuse_map(struct bpf_map *map)
4968 {
4969         char *cp, errmsg[STRERR_BUFSIZE];
4970         int err, pin_fd;
4971
4972         pin_fd = bpf_obj_get(map->pin_path);
4973         if (pin_fd < 0) {
4974                 err = -errno;
4975                 if (err == -ENOENT) {
4976                         pr_debug("found no pinned map to reuse at '%s'\n",
4977                                  map->pin_path);
4978                         return 0;
4979                 }
4980
4981                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4982                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4983                         map->pin_path, cp);
4984                 return err;
4985         }
4986
4987         if (!map_is_reuse_compat(map, pin_fd)) {
4988                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4989                         map->pin_path);
4990                 close(pin_fd);
4991                 return -EINVAL;
4992         }
4993
4994         err = bpf_map__reuse_fd(map, pin_fd);
4995         close(pin_fd);
4996         if (err)
4997                 return err;
4998
4999         map->pinned = true;
5000         pr_debug("reused pinned map at '%s'\n", map->pin_path);
5001
5002         return 0;
5003 }
5004
5005 static int
5006 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
5007 {
5008         enum libbpf_map_type map_type = map->libbpf_type;
5009         char *cp, errmsg[STRERR_BUFSIZE];
5010         int err, zero = 0;
5011
5012         if (obj->gen_loader) {
5013                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
5014                                          map->mmaped, map->def.value_size);
5015                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
5016                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
5017                 return 0;
5018         }
5019         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
5020         if (err) {
5021                 err = -errno;
5022                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5023                 pr_warn("Error setting initial map(%s) contents: %s\n",
5024                         map->name, cp);
5025                 return err;
5026         }
5027
5028         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
5029         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5030                 err = bpf_map_freeze(map->fd);
5031                 if (err) {
5032                         err = -errno;
5033                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5034                         pr_warn("Error freezing map(%s) as read-only: %s\n",
5035                                 map->name, cp);
5036                         return err;
5037                 }
5038         }
5039         return 0;
5040 }
5041
5042 static void bpf_map__destroy(struct bpf_map *map);
5043
5044 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5045 {
5046         LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5047         struct bpf_map_def *def = &map->def;
5048         const char *map_name = NULL;
5049         int err = 0;
5050
5051         if (kernel_supports(obj, FEAT_PROG_NAME))
5052                 map_name = map->name;
5053         create_attr.map_ifindex = map->map_ifindex;
5054         create_attr.map_flags = def->map_flags;
5055         create_attr.numa_node = map->numa_node;
5056         create_attr.map_extra = map->map_extra;
5057
5058         if (bpf_map__is_struct_ops(map))
5059                 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5060
5061         if (obj->btf && btf__fd(obj->btf) >= 0) {
5062                 create_attr.btf_fd = btf__fd(obj->btf);
5063                 create_attr.btf_key_type_id = map->btf_key_type_id;
5064                 create_attr.btf_value_type_id = map->btf_value_type_id;
5065         }
5066
5067         if (bpf_map_type__is_map_in_map(def->type)) {
5068                 if (map->inner_map) {
5069                         err = bpf_object__create_map(obj, map->inner_map, true);
5070                         if (err) {
5071                                 pr_warn("map '%s': failed to create inner map: %d\n",
5072                                         map->name, err);
5073                                 return err;
5074                         }
5075                         map->inner_map_fd = bpf_map__fd(map->inner_map);
5076                 }
5077                 if (map->inner_map_fd >= 0)
5078                         create_attr.inner_map_fd = map->inner_map_fd;
5079         }
5080
5081         switch (def->type) {
5082         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5083         case BPF_MAP_TYPE_CGROUP_ARRAY:
5084         case BPF_MAP_TYPE_STACK_TRACE:
5085         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5086         case BPF_MAP_TYPE_HASH_OF_MAPS:
5087         case BPF_MAP_TYPE_DEVMAP:
5088         case BPF_MAP_TYPE_DEVMAP_HASH:
5089         case BPF_MAP_TYPE_CPUMAP:
5090         case BPF_MAP_TYPE_XSKMAP:
5091         case BPF_MAP_TYPE_SOCKMAP:
5092         case BPF_MAP_TYPE_SOCKHASH:
5093         case BPF_MAP_TYPE_QUEUE:
5094         case BPF_MAP_TYPE_STACK:
5095                 create_attr.btf_fd = 0;
5096                 create_attr.btf_key_type_id = 0;
5097                 create_attr.btf_value_type_id = 0;
5098                 map->btf_key_type_id = 0;
5099                 map->btf_value_type_id = 0;
5100         default:
5101                 break;
5102         }
5103
5104         if (obj->gen_loader) {
5105                 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5106                                     def->key_size, def->value_size, def->max_entries,
5107                                     &create_attr, is_inner ? -1 : map - obj->maps);
5108                 /* Pretend to have valid FD to pass various fd >= 0 checks.
5109                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
5110                  */
5111                 map->fd = 0;
5112         } else {
5113                 map->fd = bpf_map_create(def->type, map_name,
5114                                          def->key_size, def->value_size,
5115                                          def->max_entries, &create_attr);
5116         }
5117         if (map->fd < 0 && (create_attr.btf_key_type_id ||
5118                             create_attr.btf_value_type_id)) {
5119                 char *cp, errmsg[STRERR_BUFSIZE];
5120
5121                 err = -errno;
5122                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5123                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5124                         map->name, cp, err);
5125                 create_attr.btf_fd = 0;
5126                 create_attr.btf_key_type_id = 0;
5127                 create_attr.btf_value_type_id = 0;
5128                 map->btf_key_type_id = 0;
5129                 map->btf_value_type_id = 0;
5130                 map->fd = bpf_map_create(def->type, map_name,
5131                                          def->key_size, def->value_size,
5132                                          def->max_entries, &create_attr);
5133         }
5134
5135         err = map->fd < 0 ? -errno : 0;
5136
5137         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5138                 if (obj->gen_loader)
5139                         map->inner_map->fd = -1;
5140                 bpf_map__destroy(map->inner_map);
5141                 zfree(&map->inner_map);
5142         }
5143
5144         return err;
5145 }
5146
5147 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5148 {
5149         const struct bpf_map *targ_map;
5150         unsigned int i;
5151         int fd, err = 0;
5152
5153         for (i = 0; i < map->init_slots_sz; i++) {
5154                 if (!map->init_slots[i])
5155                         continue;
5156
5157                 targ_map = map->init_slots[i];
5158                 fd = bpf_map__fd(targ_map);
5159
5160                 if (obj->gen_loader) {
5161                         bpf_gen__populate_outer_map(obj->gen_loader,
5162                                                     map - obj->maps, i,
5163                                                     targ_map - obj->maps);
5164                 } else {
5165                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5166                 }
5167                 if (err) {
5168                         err = -errno;
5169                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5170                                 map->name, i, targ_map->name, fd, err);
5171                         return err;
5172                 }
5173                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5174                          map->name, i, targ_map->name, fd);
5175         }
5176
5177         zfree(&map->init_slots);
5178         map->init_slots_sz = 0;
5179
5180         return 0;
5181 }
5182
5183 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5184 {
5185         const struct bpf_program *targ_prog;
5186         unsigned int i;
5187         int fd, err;
5188
5189         if (obj->gen_loader)
5190                 return -ENOTSUP;
5191
5192         for (i = 0; i < map->init_slots_sz; i++) {
5193                 if (!map->init_slots[i])
5194                         continue;
5195
5196                 targ_prog = map->init_slots[i];
5197                 fd = bpf_program__fd(targ_prog);
5198
5199                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5200                 if (err) {
5201                         err = -errno;
5202                         pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5203                                 map->name, i, targ_prog->name, fd, err);
5204                         return err;
5205                 }
5206                 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5207                          map->name, i, targ_prog->name, fd);
5208         }
5209
5210         zfree(&map->init_slots);
5211         map->init_slots_sz = 0;
5212
5213         return 0;
5214 }
5215
5216 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5217 {
5218         struct bpf_map *map;
5219         int i, err;
5220
5221         for (i = 0; i < obj->nr_maps; i++) {
5222                 map = &obj->maps[i];
5223
5224                 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5225                         continue;
5226
5227                 err = init_prog_array_slots(obj, map);
5228                 if (err < 0) {
5229                         zclose(map->fd);
5230                         return err;
5231                 }
5232         }
5233         return 0;
5234 }
5235
5236 static int map_set_def_max_entries(struct bpf_map *map)
5237 {
5238         if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5239                 int nr_cpus;
5240
5241                 nr_cpus = libbpf_num_possible_cpus();
5242                 if (nr_cpus < 0) {
5243                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5244                                 map->name, nr_cpus);
5245                         return nr_cpus;
5246                 }
5247                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5248                 map->def.max_entries = nr_cpus;
5249         }
5250
5251         return 0;
5252 }
5253
5254 static int
5255 bpf_object__create_maps(struct bpf_object *obj)
5256 {
5257         struct bpf_map *map;
5258         char *cp, errmsg[STRERR_BUFSIZE];
5259         unsigned int i, j;
5260         int err;
5261         bool retried;
5262
5263         for (i = 0; i < obj->nr_maps; i++) {
5264                 map = &obj->maps[i];
5265
5266                 /* To support old kernels, we skip creating global data maps
5267                  * (.rodata, .data, .kconfig, etc); later on, during program
5268                  * loading, if we detect that at least one of the to-be-loaded
5269                  * programs is referencing any global data map, we'll error
5270                  * out with program name and relocation index logged.
5271                  * This approach allows to accommodate Clang emitting
5272                  * unnecessary .rodata.str1.1 sections for string literals,
5273                  * but also it allows to have CO-RE applications that use
5274                  * global variables in some of BPF programs, but not others.
5275                  * If those global variable-using programs are not loaded at
5276                  * runtime due to bpf_program__set_autoload(prog, false),
5277                  * bpf_object loading will succeed just fine even on old
5278                  * kernels.
5279                  */
5280                 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5281                         map->autocreate = false;
5282
5283                 if (!map->autocreate) {
5284                         pr_debug("map '%s': skipped auto-creating...\n", map->name);
5285                         continue;
5286                 }
5287
5288                 err = map_set_def_max_entries(map);
5289                 if (err)
5290                         goto err_out;
5291
5292                 retried = false;
5293 retry:
5294                 if (map->pin_path) {
5295                         err = bpf_object__reuse_map(map);
5296                         if (err) {
5297                                 pr_warn("map '%s': error reusing pinned map\n",
5298                                         map->name);
5299                                 goto err_out;
5300                         }
5301                         if (retried && map->fd < 0) {
5302                                 pr_warn("map '%s': cannot find pinned map\n",
5303                                         map->name);
5304                                 err = -ENOENT;
5305                                 goto err_out;
5306                         }
5307                 }
5308
5309                 if (map->fd >= 0) {
5310                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5311                                  map->name, map->fd);
5312                 } else {
5313                         err = bpf_object__create_map(obj, map, false);
5314                         if (err)
5315                                 goto err_out;
5316
5317                         pr_debug("map '%s': created successfully, fd=%d\n",
5318                                  map->name, map->fd);
5319
5320                         if (bpf_map__is_internal(map)) {
5321                                 err = bpf_object__populate_internal_map(obj, map);
5322                                 if (err < 0) {
5323                                         zclose(map->fd);
5324                                         goto err_out;
5325                                 }
5326                         }
5327
5328                         if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5329                                 err = init_map_in_map_slots(obj, map);
5330                                 if (err < 0) {
5331                                         zclose(map->fd);
5332                                         goto err_out;
5333                                 }
5334                         }
5335                 }
5336
5337                 if (map->pin_path && !map->pinned) {
5338                         err = bpf_map__pin(map, NULL);
5339                         if (err) {
5340                                 zclose(map->fd);
5341                                 if (!retried && err == -EEXIST) {
5342                                         retried = true;
5343                                         goto retry;
5344                                 }
5345                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5346                                         map->name, map->pin_path, err);
5347                                 goto err_out;
5348                         }
5349                 }
5350         }
5351
5352         return 0;
5353
5354 err_out:
5355         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5356         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5357         pr_perm_msg(err);
5358         for (j = 0; j < i; j++)
5359                 zclose(obj->maps[j].fd);
5360         return err;
5361 }
5362
5363 static bool bpf_core_is_flavor_sep(const char *s)
5364 {
5365         /* check X___Y name pattern, where X and Y are not underscores */
5366         return s[0] != '_' &&                                 /* X */
5367                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5368                s[4] != '_';                                   /* Y */
5369 }
5370
5371 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5372  * before last triple underscore. Struct name part after last triple
5373  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5374  */
5375 size_t bpf_core_essential_name_len(const char *name)
5376 {
5377         size_t n = strlen(name);
5378         int i;
5379
5380         for (i = n - 5; i >= 0; i--) {
5381                 if (bpf_core_is_flavor_sep(name + i))
5382                         return i + 1;
5383         }
5384         return n;
5385 }
5386
5387 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5388 {
5389         if (!cands)
5390                 return;
5391
5392         free(cands->cands);
5393         free(cands);
5394 }
5395
5396 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5397                        size_t local_essent_len,
5398                        const struct btf *targ_btf,
5399                        const char *targ_btf_name,
5400                        int targ_start_id,
5401                        struct bpf_core_cand_list *cands)
5402 {
5403         struct bpf_core_cand *new_cands, *cand;
5404         const struct btf_type *t, *local_t;
5405         const char *targ_name, *local_name;
5406         size_t targ_essent_len;
5407         int n, i;
5408
5409         local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5410         local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5411
5412         n = btf__type_cnt(targ_btf);
5413         for (i = targ_start_id; i < n; i++) {
5414                 t = btf__type_by_id(targ_btf, i);
5415                 if (!btf_kind_core_compat(t, local_t))
5416                         continue;
5417
5418                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5419                 if (str_is_empty(targ_name))
5420                         continue;
5421
5422                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5423                 if (targ_essent_len != local_essent_len)
5424                         continue;
5425
5426                 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5427                         continue;
5428
5429                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5430                          local_cand->id, btf_kind_str(local_t),
5431                          local_name, i, btf_kind_str(t), targ_name,
5432                          targ_btf_name);
5433                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5434                                               sizeof(*cands->cands));
5435                 if (!new_cands)
5436                         return -ENOMEM;
5437
5438                 cand = &new_cands[cands->len];
5439                 cand->btf = targ_btf;
5440                 cand->id = i;
5441
5442                 cands->cands = new_cands;
5443                 cands->len++;
5444         }
5445         return 0;
5446 }
5447
5448 static int load_module_btfs(struct bpf_object *obj)
5449 {
5450         struct bpf_btf_info info;
5451         struct module_btf *mod_btf;
5452         struct btf *btf;
5453         char name[64];
5454         __u32 id = 0, len;
5455         int err, fd;
5456
5457         if (obj->btf_modules_loaded)
5458                 return 0;
5459
5460         if (obj->gen_loader)
5461                 return 0;
5462
5463         /* don't do this again, even if we find no module BTFs */
5464         obj->btf_modules_loaded = true;
5465
5466         /* kernel too old to support module BTFs */
5467         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5468                 return 0;
5469
5470         while (true) {
5471                 err = bpf_btf_get_next_id(id, &id);
5472                 if (err && errno == ENOENT)
5473                         return 0;
5474                 if (err) {
5475                         err = -errno;
5476                         pr_warn("failed to iterate BTF objects: %d\n", err);
5477                         return err;
5478                 }
5479
5480                 fd = bpf_btf_get_fd_by_id(id);
5481                 if (fd < 0) {
5482                         if (errno == ENOENT)
5483                                 continue; /* expected race: BTF was unloaded */
5484                         err = -errno;
5485                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5486                         return err;
5487                 }
5488
5489                 len = sizeof(info);
5490                 memset(&info, 0, sizeof(info));
5491                 info.name = ptr_to_u64(name);
5492                 info.name_len = sizeof(name);
5493
5494                 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5495                 if (err) {
5496                         err = -errno;
5497                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5498                         goto err_out;
5499                 }
5500
5501                 /* ignore non-module BTFs */
5502                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5503                         close(fd);
5504                         continue;
5505                 }
5506
5507                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5508                 err = libbpf_get_error(btf);
5509                 if (err) {
5510                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5511                                 name, id, err);
5512                         goto err_out;
5513                 }
5514
5515                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5516                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5517                 if (err)
5518                         goto err_out;
5519
5520                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5521
5522                 mod_btf->btf = btf;
5523                 mod_btf->id = id;
5524                 mod_btf->fd = fd;
5525                 mod_btf->name = strdup(name);
5526                 if (!mod_btf->name) {
5527                         err = -ENOMEM;
5528                         goto err_out;
5529                 }
5530                 continue;
5531
5532 err_out:
5533                 close(fd);
5534                 return err;
5535         }
5536
5537         return 0;
5538 }
5539
5540 static struct bpf_core_cand_list *
5541 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5542 {
5543         struct bpf_core_cand local_cand = {};
5544         struct bpf_core_cand_list *cands;
5545         const struct btf *main_btf;
5546         const struct btf_type *local_t;
5547         const char *local_name;
5548         size_t local_essent_len;
5549         int err, i;
5550
5551         local_cand.btf = local_btf;
5552         local_cand.id = local_type_id;
5553         local_t = btf__type_by_id(local_btf, local_type_id);
5554         if (!local_t)
5555                 return ERR_PTR(-EINVAL);
5556
5557         local_name = btf__name_by_offset(local_btf, local_t->name_off);
5558         if (str_is_empty(local_name))
5559                 return ERR_PTR(-EINVAL);
5560         local_essent_len = bpf_core_essential_name_len(local_name);
5561
5562         cands = calloc(1, sizeof(*cands));
5563         if (!cands)
5564                 return ERR_PTR(-ENOMEM);
5565
5566         /* Attempt to find target candidates in vmlinux BTF first */
5567         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5568         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5569         if (err)
5570                 goto err_out;
5571
5572         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5573         if (cands->len)
5574                 return cands;
5575
5576         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5577         if (obj->btf_vmlinux_override)
5578                 return cands;
5579
5580         /* now look through module BTFs, trying to still find candidates */
5581         err = load_module_btfs(obj);
5582         if (err)
5583                 goto err_out;
5584
5585         for (i = 0; i < obj->btf_module_cnt; i++) {
5586                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5587                                          obj->btf_modules[i].btf,
5588                                          obj->btf_modules[i].name,
5589                                          btf__type_cnt(obj->btf_vmlinux),
5590                                          cands);
5591                 if (err)
5592                         goto err_out;
5593         }
5594
5595         return cands;
5596 err_out:
5597         bpf_core_free_cands(cands);
5598         return ERR_PTR(err);
5599 }
5600
5601 /* Check local and target types for compatibility. This check is used for
5602  * type-based CO-RE relocations and follow slightly different rules than
5603  * field-based relocations. This function assumes that root types were already
5604  * checked for name match. Beyond that initial root-level name check, names
5605  * are completely ignored. Compatibility rules are as follows:
5606  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5607  *     kind should match for local and target types (i.e., STRUCT is not
5608  *     compatible with UNION);
5609  *   - for ENUMs, the size is ignored;
5610  *   - for INT, size and signedness are ignored;
5611  *   - for ARRAY, dimensionality is ignored, element types are checked for
5612  *     compatibility recursively;
5613  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5614  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5615  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5616  *     number of input args and compatible return and argument types.
5617  * These rules are not set in stone and probably will be adjusted as we get
5618  * more experience with using BPF CO-RE relocations.
5619  */
5620 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5621                               const struct btf *targ_btf, __u32 targ_id)
5622 {
5623         return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5624 }
5625
5626 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5627                          const struct btf *targ_btf, __u32 targ_id)
5628 {
5629         return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5630 }
5631
5632 static size_t bpf_core_hash_fn(const long key, void *ctx)
5633 {
5634         return key;
5635 }
5636
5637 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5638 {
5639         return k1 == k2;
5640 }
5641
5642 static int record_relo_core(struct bpf_program *prog,
5643                             const struct bpf_core_relo *core_relo, int insn_idx)
5644 {
5645         struct reloc_desc *relos, *relo;
5646
5647         relos = libbpf_reallocarray(prog->reloc_desc,
5648                                     prog->nr_reloc + 1, sizeof(*relos));
5649         if (!relos)
5650                 return -ENOMEM;
5651         relo = &relos[prog->nr_reloc];
5652         relo->type = RELO_CORE;
5653         relo->insn_idx = insn_idx;
5654         relo->core_relo = core_relo;
5655         prog->reloc_desc = relos;
5656         prog->nr_reloc++;
5657         return 0;
5658 }
5659
5660 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5661 {
5662         struct reloc_desc *relo;
5663         int i;
5664
5665         for (i = 0; i < prog->nr_reloc; i++) {
5666                 relo = &prog->reloc_desc[i];
5667                 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5668                         continue;
5669
5670                 return relo->core_relo;
5671         }
5672
5673         return NULL;
5674 }
5675
5676 static int bpf_core_resolve_relo(struct bpf_program *prog,
5677                                  const struct bpf_core_relo *relo,
5678                                  int relo_idx,
5679                                  const struct btf *local_btf,
5680                                  struct hashmap *cand_cache,
5681                                  struct bpf_core_relo_res *targ_res)
5682 {
5683         struct bpf_core_spec specs_scratch[3] = {};
5684         struct bpf_core_cand_list *cands = NULL;
5685         const char *prog_name = prog->name;
5686         const struct btf_type *local_type;
5687         const char *local_name;
5688         __u32 local_id = relo->type_id;
5689         int err;
5690
5691         local_type = btf__type_by_id(local_btf, local_id);
5692         if (!local_type)
5693                 return -EINVAL;
5694
5695         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5696         if (!local_name)
5697                 return -EINVAL;
5698
5699         if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5700             !hashmap__find(cand_cache, local_id, &cands)) {
5701                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5702                 if (IS_ERR(cands)) {
5703                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5704                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5705                                 local_name, PTR_ERR(cands));
5706                         return PTR_ERR(cands);
5707                 }
5708                 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5709                 if (err) {
5710                         bpf_core_free_cands(cands);
5711                         return err;
5712                 }
5713         }
5714
5715         return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5716                                        targ_res);
5717 }
5718
5719 static int
5720 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5721 {
5722         const struct btf_ext_info_sec *sec;
5723         struct bpf_core_relo_res targ_res;
5724         const struct bpf_core_relo *rec;
5725         const struct btf_ext_info *seg;
5726         struct hashmap_entry *entry;
5727         struct hashmap *cand_cache = NULL;
5728         struct bpf_program *prog;
5729         struct bpf_insn *insn;
5730         const char *sec_name;
5731         int i, err = 0, insn_idx, sec_idx, sec_num;
5732
5733         if (obj->btf_ext->core_relo_info.len == 0)
5734                 return 0;
5735
5736         if (targ_btf_path) {
5737                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5738                 err = libbpf_get_error(obj->btf_vmlinux_override);
5739                 if (err) {
5740                         pr_warn("failed to parse target BTF: %d\n", err);
5741                         return err;
5742                 }
5743         }
5744
5745         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5746         if (IS_ERR(cand_cache)) {
5747                 err = PTR_ERR(cand_cache);
5748                 goto out;
5749         }
5750
5751         seg = &obj->btf_ext->core_relo_info;
5752         sec_num = 0;
5753         for_each_btf_ext_sec(seg, sec) {
5754                 sec_idx = seg->sec_idxs[sec_num];
5755                 sec_num++;
5756
5757                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5758                 if (str_is_empty(sec_name)) {
5759                         err = -EINVAL;
5760                         goto out;
5761                 }
5762
5763                 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5764
5765                 for_each_btf_ext_rec(seg, sec, i, rec) {
5766                         if (rec->insn_off % BPF_INSN_SZ)
5767                                 return -EINVAL;
5768                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5769                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5770                         if (!prog) {
5771                                 /* When __weak subprog is "overridden" by another instance
5772                                  * of the subprog from a different object file, linker still
5773                                  * appends all the .BTF.ext info that used to belong to that
5774                                  * eliminated subprogram.
5775                                  * This is similar to what x86-64 linker does for relocations.
5776                                  * So just ignore such relocations just like we ignore
5777                                  * subprog instructions when discovering subprograms.
5778                                  */
5779                                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5780                                          sec_name, i, insn_idx);
5781                                 continue;
5782                         }
5783                         /* no need to apply CO-RE relocation if the program is
5784                          * not going to be loaded
5785                          */
5786                         if (!prog->autoload)
5787                                 continue;
5788
5789                         /* adjust insn_idx from section frame of reference to the local
5790                          * program's frame of reference; (sub-)program code is not yet
5791                          * relocated, so it's enough to just subtract in-section offset
5792                          */
5793                         insn_idx = insn_idx - prog->sec_insn_off;
5794                         if (insn_idx >= prog->insns_cnt)
5795                                 return -EINVAL;
5796                         insn = &prog->insns[insn_idx];
5797
5798                         err = record_relo_core(prog, rec, insn_idx);
5799                         if (err) {
5800                                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5801                                         prog->name, i, err);
5802                                 goto out;
5803                         }
5804
5805                         if (prog->obj->gen_loader)
5806                                 continue;
5807
5808                         err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5809                         if (err) {
5810                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5811                                         prog->name, i, err);
5812                                 goto out;
5813                         }
5814
5815                         err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5816                         if (err) {
5817                                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5818                                         prog->name, i, insn_idx, err);
5819                                 goto out;
5820                         }
5821                 }
5822         }
5823
5824 out:
5825         /* obj->btf_vmlinux and module BTFs are freed after object load */
5826         btf__free(obj->btf_vmlinux_override);
5827         obj->btf_vmlinux_override = NULL;
5828
5829         if (!IS_ERR_OR_NULL(cand_cache)) {
5830                 hashmap__for_each_entry(cand_cache, entry, i) {
5831                         bpf_core_free_cands(entry->pvalue);
5832                 }
5833                 hashmap__free(cand_cache);
5834         }
5835         return err;
5836 }
5837
5838 /* base map load ldimm64 special constant, used also for log fixup logic */
5839 #define POISON_LDIMM64_MAP_BASE 2001000000
5840 #define POISON_LDIMM64_MAP_PFX "200100"
5841
5842 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5843                                int insn_idx, struct bpf_insn *insn,
5844                                int map_idx, const struct bpf_map *map)
5845 {
5846         int i;
5847
5848         pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5849                  prog->name, relo_idx, insn_idx, map_idx, map->name);
5850
5851         /* we turn single ldimm64 into two identical invalid calls */
5852         for (i = 0; i < 2; i++) {
5853                 insn->code = BPF_JMP | BPF_CALL;
5854                 insn->dst_reg = 0;
5855                 insn->src_reg = 0;
5856                 insn->off = 0;
5857                 /* if this instruction is reachable (not a dead code),
5858                  * verifier will complain with something like:
5859                  * invalid func unknown#2001000123
5860                  * where lower 123 is map index into obj->maps[] array
5861                  */
5862                 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
5863
5864                 insn++;
5865         }
5866 }
5867
5868 /* unresolved kfunc call special constant, used also for log fixup logic */
5869 #define POISON_CALL_KFUNC_BASE 2002000000
5870 #define POISON_CALL_KFUNC_PFX "2002"
5871
5872 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
5873                               int insn_idx, struct bpf_insn *insn,
5874                               int ext_idx, const struct extern_desc *ext)
5875 {
5876         pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5877                  prog->name, relo_idx, insn_idx, ext->name);
5878
5879         /* we turn kfunc call into invalid helper call with identifiable constant */
5880         insn->code = BPF_JMP | BPF_CALL;
5881         insn->dst_reg = 0;
5882         insn->src_reg = 0;
5883         insn->off = 0;
5884         /* if this instruction is reachable (not a dead code),
5885          * verifier will complain with something like:
5886          * invalid func unknown#2001000123
5887          * where lower 123 is extern index into obj->externs[] array
5888          */
5889         insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
5890 }
5891
5892 /* Relocate data references within program code:
5893  *  - map references;
5894  *  - global variable references;
5895  *  - extern references.
5896  */
5897 static int
5898 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5899 {
5900         int i;
5901
5902         for (i = 0; i < prog->nr_reloc; i++) {
5903                 struct reloc_desc *relo = &prog->reloc_desc[i];
5904                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5905                 const struct bpf_map *map;
5906                 struct extern_desc *ext;
5907
5908                 switch (relo->type) {
5909                 case RELO_LD64:
5910                         map = &obj->maps[relo->map_idx];
5911                         if (obj->gen_loader) {
5912                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5913                                 insn[0].imm = relo->map_idx;
5914                         } else if (map->autocreate) {
5915                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5916                                 insn[0].imm = map->fd;
5917                         } else {
5918                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5919                                                    relo->map_idx, map);
5920                         }
5921                         break;
5922                 case RELO_DATA:
5923                         map = &obj->maps[relo->map_idx];
5924                         insn[1].imm = insn[0].imm + relo->sym_off;
5925                         if (obj->gen_loader) {
5926                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5927                                 insn[0].imm = relo->map_idx;
5928                         } else if (map->autocreate) {
5929                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5930                                 insn[0].imm = map->fd;
5931                         } else {
5932                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5933                                                    relo->map_idx, map);
5934                         }
5935                         break;
5936                 case RELO_EXTERN_LD64:
5937                         ext = &obj->externs[relo->ext_idx];
5938                         if (ext->type == EXT_KCFG) {
5939                                 if (obj->gen_loader) {
5940                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5941                                         insn[0].imm = obj->kconfig_map_idx;
5942                                 } else {
5943                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5944                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5945                                 }
5946                                 insn[1].imm = ext->kcfg.data_off;
5947                         } else /* EXT_KSYM */ {
5948                                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5949                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5950                                         insn[0].imm = ext->ksym.kernel_btf_id;
5951                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5952                                 } else { /* typeless ksyms or unresolved typed ksyms */
5953                                         insn[0].imm = (__u32)ext->ksym.addr;
5954                                         insn[1].imm = ext->ksym.addr >> 32;
5955                                 }
5956                         }
5957                         break;
5958                 case RELO_EXTERN_CALL:
5959                         ext = &obj->externs[relo->ext_idx];
5960                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5961                         if (ext->is_set) {
5962                                 insn[0].imm = ext->ksym.kernel_btf_id;
5963                                 insn[0].off = ext->ksym.btf_fd_idx;
5964                         } else { /* unresolved weak kfunc call */
5965                                 poison_kfunc_call(prog, i, relo->insn_idx, insn,
5966                                                   relo->ext_idx, ext);
5967                         }
5968                         break;
5969                 case RELO_SUBPROG_ADDR:
5970                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5971                                 pr_warn("prog '%s': relo #%d: bad insn\n",
5972                                         prog->name, i);
5973                                 return -EINVAL;
5974                         }
5975                         /* handled already */
5976                         break;
5977                 case RELO_CALL:
5978                         /* handled already */
5979                         break;
5980                 case RELO_CORE:
5981                         /* will be handled by bpf_program_record_relos() */
5982                         break;
5983                 default:
5984                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5985                                 prog->name, i, relo->type);
5986                         return -EINVAL;
5987                 }
5988         }
5989
5990         return 0;
5991 }
5992
5993 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5994                                     const struct bpf_program *prog,
5995                                     const struct btf_ext_info *ext_info,
5996                                     void **prog_info, __u32 *prog_rec_cnt,
5997                                     __u32 *prog_rec_sz)
5998 {
5999         void *copy_start = NULL, *copy_end = NULL;
6000         void *rec, *rec_end, *new_prog_info;
6001         const struct btf_ext_info_sec *sec;
6002         size_t old_sz, new_sz;
6003         int i, sec_num, sec_idx, off_adj;
6004
6005         sec_num = 0;
6006         for_each_btf_ext_sec(ext_info, sec) {
6007                 sec_idx = ext_info->sec_idxs[sec_num];
6008                 sec_num++;
6009                 if (prog->sec_idx != sec_idx)
6010                         continue;
6011
6012                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
6013                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
6014
6015                         if (insn_off < prog->sec_insn_off)
6016                                 continue;
6017                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
6018                                 break;
6019
6020                         if (!copy_start)
6021                                 copy_start = rec;
6022                         copy_end = rec + ext_info->rec_size;
6023                 }
6024
6025                 if (!copy_start)
6026                         return -ENOENT;
6027
6028                 /* append func/line info of a given (sub-)program to the main
6029                  * program func/line info
6030                  */
6031                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6032                 new_sz = old_sz + (copy_end - copy_start);
6033                 new_prog_info = realloc(*prog_info, new_sz);
6034                 if (!new_prog_info)
6035                         return -ENOMEM;
6036                 *prog_info = new_prog_info;
6037                 *prog_rec_cnt = new_sz / ext_info->rec_size;
6038                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6039
6040                 /* Kernel instruction offsets are in units of 8-byte
6041                  * instructions, while .BTF.ext instruction offsets generated
6042                  * by Clang are in units of bytes. So convert Clang offsets
6043                  * into kernel offsets and adjust offset according to program
6044                  * relocated position.
6045                  */
6046                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6047                 rec = new_prog_info + old_sz;
6048                 rec_end = new_prog_info + new_sz;
6049                 for (; rec < rec_end; rec += ext_info->rec_size) {
6050                         __u32 *insn_off = rec;
6051
6052                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6053                 }
6054                 *prog_rec_sz = ext_info->rec_size;
6055                 return 0;
6056         }
6057
6058         return -ENOENT;
6059 }
6060
6061 static int
6062 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6063                               struct bpf_program *main_prog,
6064                               const struct bpf_program *prog)
6065 {
6066         int err;
6067
6068         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6069          * supprot func/line info
6070          */
6071         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6072                 return 0;
6073
6074         /* only attempt func info relocation if main program's func_info
6075          * relocation was successful
6076          */
6077         if (main_prog != prog && !main_prog->func_info)
6078                 goto line_info;
6079
6080         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6081                                        &main_prog->func_info,
6082                                        &main_prog->func_info_cnt,
6083                                        &main_prog->func_info_rec_size);
6084         if (err) {
6085                 if (err != -ENOENT) {
6086                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6087                                 prog->name, err);
6088                         return err;
6089                 }
6090                 if (main_prog->func_info) {
6091                         /*
6092                          * Some info has already been found but has problem
6093                          * in the last btf_ext reloc. Must have to error out.
6094                          */
6095                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6096                         return err;
6097                 }
6098                 /* Have problem loading the very first info. Ignore the rest. */
6099                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6100                         prog->name);
6101         }
6102
6103 line_info:
6104         /* don't relocate line info if main program's relocation failed */
6105         if (main_prog != prog && !main_prog->line_info)
6106                 return 0;
6107
6108         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6109                                        &main_prog->line_info,
6110                                        &main_prog->line_info_cnt,
6111                                        &main_prog->line_info_rec_size);
6112         if (err) {
6113                 if (err != -ENOENT) {
6114                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6115                                 prog->name, err);
6116                         return err;
6117                 }
6118                 if (main_prog->line_info) {
6119                         /*
6120                          * Some info has already been found but has problem
6121                          * in the last btf_ext reloc. Must have to error out.
6122                          */
6123                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6124                         return err;
6125                 }
6126                 /* Have problem loading the very first info. Ignore the rest. */
6127                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6128                         prog->name);
6129         }
6130         return 0;
6131 }
6132
6133 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6134 {
6135         size_t insn_idx = *(const size_t *)key;
6136         const struct reloc_desc *relo = elem;
6137
6138         if (insn_idx == relo->insn_idx)
6139                 return 0;
6140         return insn_idx < relo->insn_idx ? -1 : 1;
6141 }
6142
6143 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6144 {
6145         if (!prog->nr_reloc)
6146                 return NULL;
6147         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6148                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6149 }
6150
6151 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6152 {
6153         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6154         struct reloc_desc *relos;
6155         int i;
6156
6157         if (main_prog == subprog)
6158                 return 0;
6159         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6160         if (!relos)
6161                 return -ENOMEM;
6162         if (subprog->nr_reloc)
6163                 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6164                        sizeof(*relos) * subprog->nr_reloc);
6165
6166         for (i = main_prog->nr_reloc; i < new_cnt; i++)
6167                 relos[i].insn_idx += subprog->sub_insn_off;
6168         /* After insn_idx adjustment the 'relos' array is still sorted
6169          * by insn_idx and doesn't break bsearch.
6170          */
6171         main_prog->reloc_desc = relos;
6172         main_prog->nr_reloc = new_cnt;
6173         return 0;
6174 }
6175
6176 static int
6177 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6178                        struct bpf_program *prog)
6179 {
6180         size_t sub_insn_idx, insn_idx, new_cnt;
6181         struct bpf_program *subprog;
6182         struct bpf_insn *insns, *insn;
6183         struct reloc_desc *relo;
6184         int err;
6185
6186         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6187         if (err)
6188                 return err;
6189
6190         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6191                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6192                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6193                         continue;
6194
6195                 relo = find_prog_insn_relo(prog, insn_idx);
6196                 if (relo && relo->type == RELO_EXTERN_CALL)
6197                         /* kfunc relocations will be handled later
6198                          * in bpf_object__relocate_data()
6199                          */
6200                         continue;
6201                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6202                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6203                                 prog->name, insn_idx, relo->type);
6204                         return -LIBBPF_ERRNO__RELOC;
6205                 }
6206                 if (relo) {
6207                         /* sub-program instruction index is a combination of
6208                          * an offset of a symbol pointed to by relocation and
6209                          * call instruction's imm field; for global functions,
6210                          * call always has imm = -1, but for static functions
6211                          * relocation is against STT_SECTION and insn->imm
6212                          * points to a start of a static function
6213                          *
6214                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6215                          * the byte offset in the corresponding section.
6216                          */
6217                         if (relo->type == RELO_CALL)
6218                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6219                         else
6220                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6221                 } else if (insn_is_pseudo_func(insn)) {
6222                         /*
6223                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6224                          * functions are in the same section, so it shouldn't reach here.
6225                          */
6226                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6227                                 prog->name, insn_idx);
6228                         return -LIBBPF_ERRNO__RELOC;
6229                 } else {
6230                         /* if subprogram call is to a static function within
6231                          * the same ELF section, there won't be any relocation
6232                          * emitted, but it also means there is no additional
6233                          * offset necessary, insns->imm is relative to
6234                          * instruction's original position within the section
6235                          */
6236                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6237                 }
6238
6239                 /* we enforce that sub-programs should be in .text section */
6240                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6241                 if (!subprog) {
6242                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6243                                 prog->name);
6244                         return -LIBBPF_ERRNO__RELOC;
6245                 }
6246
6247                 /* if it's the first call instruction calling into this
6248                  * subprogram (meaning this subprog hasn't been processed
6249                  * yet) within the context of current main program:
6250                  *   - append it at the end of main program's instructions blog;
6251                  *   - process is recursively, while current program is put on hold;
6252                  *   - if that subprogram calls some other not yet processes
6253                  *   subprogram, same thing will happen recursively until
6254                  *   there are no more unprocesses subprograms left to append
6255                  *   and relocate.
6256                  */
6257                 if (subprog->sub_insn_off == 0) {
6258                         subprog->sub_insn_off = main_prog->insns_cnt;
6259
6260                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6261                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6262                         if (!insns) {
6263                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6264                                 return -ENOMEM;
6265                         }
6266                         main_prog->insns = insns;
6267                         main_prog->insns_cnt = new_cnt;
6268
6269                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6270                                subprog->insns_cnt * sizeof(*insns));
6271
6272                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6273                                  main_prog->name, subprog->insns_cnt, subprog->name);
6274
6275                         /* The subprog insns are now appended. Append its relos too. */
6276                         err = append_subprog_relos(main_prog, subprog);
6277                         if (err)
6278                                 return err;
6279                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6280                         if (err)
6281                                 return err;
6282                 }
6283
6284                 /* main_prog->insns memory could have been re-allocated, so
6285                  * calculate pointer again
6286                  */
6287                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6288                 /* calculate correct instruction position within current main
6289                  * prog; each main prog can have a different set of
6290                  * subprograms appended (potentially in different order as
6291                  * well), so position of any subprog can be different for
6292                  * different main programs
6293                  */
6294                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6295
6296                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6297                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6298         }
6299
6300         return 0;
6301 }
6302
6303 /*
6304  * Relocate sub-program calls.
6305  *
6306  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6307  * main prog) is processed separately. For each subprog (non-entry functions,
6308  * that can be called from either entry progs or other subprogs) gets their
6309  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6310  * hasn't been yet appended and relocated within current main prog. Once its
6311  * relocated, sub_insn_off will point at the position within current main prog
6312  * where given subprog was appended. This will further be used to relocate all
6313  * the call instructions jumping into this subprog.
6314  *
6315  * We start with main program and process all call instructions. If the call
6316  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6317  * is zero), subprog instructions are appended at the end of main program's
6318  * instruction array. Then main program is "put on hold" while we recursively
6319  * process newly appended subprogram. If that subprogram calls into another
6320  * subprogram that hasn't been appended, new subprogram is appended again to
6321  * the *main* prog's instructions (subprog's instructions are always left
6322  * untouched, as they need to be in unmodified state for subsequent main progs
6323  * and subprog instructions are always sent only as part of a main prog) and
6324  * the process continues recursively. Once all the subprogs called from a main
6325  * prog or any of its subprogs are appended (and relocated), all their
6326  * positions within finalized instructions array are known, so it's easy to
6327  * rewrite call instructions with correct relative offsets, corresponding to
6328  * desired target subprog.
6329  *
6330  * Its important to realize that some subprogs might not be called from some
6331  * main prog and any of its called/used subprogs. Those will keep their
6332  * subprog->sub_insn_off as zero at all times and won't be appended to current
6333  * main prog and won't be relocated within the context of current main prog.
6334  * They might still be used from other main progs later.
6335  *
6336  * Visually this process can be shown as below. Suppose we have two main
6337  * programs mainA and mainB and BPF object contains three subprogs: subA,
6338  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6339  * subC both call subB:
6340  *
6341  *        +--------+ +-------+
6342  *        |        v v       |
6343  *     +--+---+ +--+-+-+ +---+--+
6344  *     | subA | | subB | | subC |
6345  *     +--+---+ +------+ +---+--+
6346  *        ^                  ^
6347  *        |                  |
6348  *    +---+-------+   +------+----+
6349  *    |   mainA   |   |   mainB   |
6350  *    +-----------+   +-----------+
6351  *
6352  * We'll start relocating mainA, will find subA, append it and start
6353  * processing sub A recursively:
6354  *
6355  *    +-----------+------+
6356  *    |   mainA   | subA |
6357  *    +-----------+------+
6358  *
6359  * At this point we notice that subB is used from subA, so we append it and
6360  * relocate (there are no further subcalls from subB):
6361  *
6362  *    +-----------+------+------+
6363  *    |   mainA   | subA | subB |
6364  *    +-----------+------+------+
6365  *
6366  * At this point, we relocate subA calls, then go one level up and finish with
6367  * relocatin mainA calls. mainA is done.
6368  *
6369  * For mainB process is similar but results in different order. We start with
6370  * mainB and skip subA and subB, as mainB never calls them (at least
6371  * directly), but we see subC is needed, so we append and start processing it:
6372  *
6373  *    +-----------+------+
6374  *    |   mainB   | subC |
6375  *    +-----------+------+
6376  * Now we see subC needs subB, so we go back to it, append and relocate it:
6377  *
6378  *    +-----------+------+------+
6379  *    |   mainB   | subC | subB |
6380  *    +-----------+------+------+
6381  *
6382  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6383  */
6384 static int
6385 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6386 {
6387         struct bpf_program *subprog;
6388         int i, err;
6389
6390         /* mark all subprogs as not relocated (yet) within the context of
6391          * current main program
6392          */
6393         for (i = 0; i < obj->nr_programs; i++) {
6394                 subprog = &obj->programs[i];
6395                 if (!prog_is_subprog(obj, subprog))
6396                         continue;
6397
6398                 subprog->sub_insn_off = 0;
6399         }
6400
6401         err = bpf_object__reloc_code(obj, prog, prog);
6402         if (err)
6403                 return err;
6404
6405         return 0;
6406 }
6407
6408 static void
6409 bpf_object__free_relocs(struct bpf_object *obj)
6410 {
6411         struct bpf_program *prog;
6412         int i;
6413
6414         /* free up relocation descriptors */
6415         for (i = 0; i < obj->nr_programs; i++) {
6416                 prog = &obj->programs[i];
6417                 zfree(&prog->reloc_desc);
6418                 prog->nr_reloc = 0;
6419         }
6420 }
6421
6422 static int cmp_relocs(const void *_a, const void *_b)
6423 {
6424         const struct reloc_desc *a = _a;
6425         const struct reloc_desc *b = _b;
6426
6427         if (a->insn_idx != b->insn_idx)
6428                 return a->insn_idx < b->insn_idx ? -1 : 1;
6429
6430         /* no two relocations should have the same insn_idx, but ... */
6431         if (a->type != b->type)
6432                 return a->type < b->type ? -1 : 1;
6433
6434         return 0;
6435 }
6436
6437 static void bpf_object__sort_relos(struct bpf_object *obj)
6438 {
6439         int i;
6440
6441         for (i = 0; i < obj->nr_programs; i++) {
6442                 struct bpf_program *p = &obj->programs[i];
6443
6444                 if (!p->nr_reloc)
6445                         continue;
6446
6447                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6448         }
6449 }
6450
6451 static int
6452 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6453 {
6454         struct bpf_program *prog;
6455         size_t i, j;
6456         int err;
6457
6458         if (obj->btf_ext) {
6459                 err = bpf_object__relocate_core(obj, targ_btf_path);
6460                 if (err) {
6461                         pr_warn("failed to perform CO-RE relocations: %d\n",
6462                                 err);
6463                         return err;
6464                 }
6465                 bpf_object__sort_relos(obj);
6466         }
6467
6468         /* Before relocating calls pre-process relocations and mark
6469          * few ld_imm64 instructions that points to subprogs.
6470          * Otherwise bpf_object__reloc_code() later would have to consider
6471          * all ld_imm64 insns as relocation candidates. That would
6472          * reduce relocation speed, since amount of find_prog_insn_relo()
6473          * would increase and most of them will fail to find a relo.
6474          */
6475         for (i = 0; i < obj->nr_programs; i++) {
6476                 prog = &obj->programs[i];
6477                 for (j = 0; j < prog->nr_reloc; j++) {
6478                         struct reloc_desc *relo = &prog->reloc_desc[j];
6479                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6480
6481                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6482                         if (relo->type == RELO_SUBPROG_ADDR)
6483                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6484                 }
6485         }
6486
6487         /* relocate subprogram calls and append used subprograms to main
6488          * programs; each copy of subprogram code needs to be relocated
6489          * differently for each main program, because its code location might
6490          * have changed.
6491          * Append subprog relos to main programs to allow data relos to be
6492          * processed after text is completely relocated.
6493          */
6494         for (i = 0; i < obj->nr_programs; i++) {
6495                 prog = &obj->programs[i];
6496                 /* sub-program's sub-calls are relocated within the context of
6497                  * its main program only
6498                  */
6499                 if (prog_is_subprog(obj, prog))
6500                         continue;
6501                 if (!prog->autoload)
6502                         continue;
6503
6504                 err = bpf_object__relocate_calls(obj, prog);
6505                 if (err) {
6506                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6507                                 prog->name, err);
6508                         return err;
6509                 }
6510         }
6511         /* Process data relos for main programs */
6512         for (i = 0; i < obj->nr_programs; i++) {
6513                 prog = &obj->programs[i];
6514                 if (prog_is_subprog(obj, prog))
6515                         continue;
6516                 if (!prog->autoload)
6517                         continue;
6518                 err = bpf_object__relocate_data(obj, prog);
6519                 if (err) {
6520                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6521                                 prog->name, err);
6522                         return err;
6523                 }
6524         }
6525
6526         return 0;
6527 }
6528
6529 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6530                                             Elf64_Shdr *shdr, Elf_Data *data);
6531
6532 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6533                                          Elf64_Shdr *shdr, Elf_Data *data)
6534 {
6535         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6536         int i, j, nrels, new_sz;
6537         const struct btf_var_secinfo *vi = NULL;
6538         const struct btf_type *sec, *var, *def;
6539         struct bpf_map *map = NULL, *targ_map = NULL;
6540         struct bpf_program *targ_prog = NULL;
6541         bool is_prog_array, is_map_in_map;
6542         const struct btf_member *member;
6543         const char *name, *mname, *type;
6544         unsigned int moff;
6545         Elf64_Sym *sym;
6546         Elf64_Rel *rel;
6547         void *tmp;
6548
6549         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6550                 return -EINVAL;
6551         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6552         if (!sec)
6553                 return -EINVAL;
6554
6555         nrels = shdr->sh_size / shdr->sh_entsize;
6556         for (i = 0; i < nrels; i++) {
6557                 rel = elf_rel_by_idx(data, i);
6558                 if (!rel) {
6559                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6560                         return -LIBBPF_ERRNO__FORMAT;
6561                 }
6562
6563                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6564                 if (!sym) {
6565                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6566                                 i, (size_t)ELF64_R_SYM(rel->r_info));
6567                         return -LIBBPF_ERRNO__FORMAT;
6568                 }
6569                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6570
6571                 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6572                          i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6573                          (size_t)rel->r_offset, sym->st_name, name);
6574
6575                 for (j = 0; j < obj->nr_maps; j++) {
6576                         map = &obj->maps[j];
6577                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6578                                 continue;
6579
6580                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6581                         if (vi->offset <= rel->r_offset &&
6582                             rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6583                                 break;
6584                 }
6585                 if (j == obj->nr_maps) {
6586                         pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6587                                 i, name, (size_t)rel->r_offset);
6588                         return -EINVAL;
6589                 }
6590
6591                 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6592                 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6593                 type = is_map_in_map ? "map" : "prog";
6594                 if (is_map_in_map) {
6595                         if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6596                                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6597                                         i, name);
6598                                 return -LIBBPF_ERRNO__RELOC;
6599                         }
6600                         if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6601                             map->def.key_size != sizeof(int)) {
6602                                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6603                                         i, map->name, sizeof(int));
6604                                 return -EINVAL;
6605                         }
6606                         targ_map = bpf_object__find_map_by_name(obj, name);
6607                         if (!targ_map) {
6608                                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6609                                         i, name);
6610                                 return -ESRCH;
6611                         }
6612                 } else if (is_prog_array) {
6613                         targ_prog = bpf_object__find_program_by_name(obj, name);
6614                         if (!targ_prog) {
6615                                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6616                                         i, name);
6617                                 return -ESRCH;
6618                         }
6619                         if (targ_prog->sec_idx != sym->st_shndx ||
6620                             targ_prog->sec_insn_off * 8 != sym->st_value ||
6621                             prog_is_subprog(obj, targ_prog)) {
6622                                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6623                                         i, name);
6624                                 return -LIBBPF_ERRNO__RELOC;
6625                         }
6626                 } else {
6627                         return -EINVAL;
6628                 }
6629
6630                 var = btf__type_by_id(obj->btf, vi->type);
6631                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6632                 if (btf_vlen(def) == 0)
6633                         return -EINVAL;
6634                 member = btf_members(def) + btf_vlen(def) - 1;
6635                 mname = btf__name_by_offset(obj->btf, member->name_off);
6636                 if (strcmp(mname, "values"))
6637                         return -EINVAL;
6638
6639                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6640                 if (rel->r_offset - vi->offset < moff)
6641                         return -EINVAL;
6642
6643                 moff = rel->r_offset - vi->offset - moff;
6644                 /* here we use BPF pointer size, which is always 64 bit, as we
6645                  * are parsing ELF that was built for BPF target
6646                  */
6647                 if (moff % bpf_ptr_sz)
6648                         return -EINVAL;
6649                 moff /= bpf_ptr_sz;
6650                 if (moff >= map->init_slots_sz) {
6651                         new_sz = moff + 1;
6652                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6653                         if (!tmp)
6654                                 return -ENOMEM;
6655                         map->init_slots = tmp;
6656                         memset(map->init_slots + map->init_slots_sz, 0,
6657                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6658                         map->init_slots_sz = new_sz;
6659                 }
6660                 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
6661
6662                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
6663                          i, map->name, moff, type, name);
6664         }
6665
6666         return 0;
6667 }
6668
6669 static int bpf_object__collect_relos(struct bpf_object *obj)
6670 {
6671         int i, err;
6672
6673         for (i = 0; i < obj->efile.sec_cnt; i++) {
6674                 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
6675                 Elf64_Shdr *shdr;
6676                 Elf_Data *data;
6677                 int idx;
6678
6679                 if (sec_desc->sec_type != SEC_RELO)
6680                         continue;
6681
6682                 shdr = sec_desc->shdr;
6683                 data = sec_desc->data;
6684                 idx = shdr->sh_info;
6685
6686                 if (shdr->sh_type != SHT_REL) {
6687                         pr_warn("internal error at %d\n", __LINE__);
6688                         return -LIBBPF_ERRNO__INTERNAL;
6689                 }
6690
6691                 if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
6692                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6693                 else if (idx == obj->efile.btf_maps_shndx)
6694                         err = bpf_object__collect_map_relos(obj, shdr, data);
6695                 else
6696                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6697                 if (err)
6698                         return err;
6699         }
6700
6701         bpf_object__sort_relos(obj);
6702         return 0;
6703 }
6704
6705 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6706 {
6707         if (BPF_CLASS(insn->code) == BPF_JMP &&
6708             BPF_OP(insn->code) == BPF_CALL &&
6709             BPF_SRC(insn->code) == BPF_K &&
6710             insn->src_reg == 0 &&
6711             insn->dst_reg == 0) {
6712                     *func_id = insn->imm;
6713                     return true;
6714         }
6715         return false;
6716 }
6717
6718 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
6719 {
6720         struct bpf_insn *insn = prog->insns;
6721         enum bpf_func_id func_id;
6722         int i;
6723
6724         if (obj->gen_loader)
6725                 return 0;
6726
6727         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6728                 if (!insn_is_helper_call(insn, &func_id))
6729                         continue;
6730
6731                 /* on kernels that don't yet support
6732                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6733                  * to bpf_probe_read() which works well for old kernels
6734                  */
6735                 switch (func_id) {
6736                 case BPF_FUNC_probe_read_kernel:
6737                 case BPF_FUNC_probe_read_user:
6738                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6739                                 insn->imm = BPF_FUNC_probe_read;
6740                         break;
6741                 case BPF_FUNC_probe_read_kernel_str:
6742                 case BPF_FUNC_probe_read_user_str:
6743                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6744                                 insn->imm = BPF_FUNC_probe_read_str;
6745                         break;
6746                 default:
6747                         break;
6748                 }
6749         }
6750         return 0;
6751 }
6752
6753 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
6754                                      int *btf_obj_fd, int *btf_type_id);
6755
6756 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
6757 static int libbpf_prepare_prog_load(struct bpf_program *prog,
6758                                     struct bpf_prog_load_opts *opts, long cookie)
6759 {
6760         enum sec_def_flags def = cookie;
6761
6762         /* old kernels might not support specifying expected_attach_type */
6763         if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
6764                 opts->expected_attach_type = 0;
6765
6766         if (def & SEC_SLEEPABLE)
6767                 opts->prog_flags |= BPF_F_SLEEPABLE;
6768
6769         if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
6770                 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
6771
6772         if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
6773                 int btf_obj_fd = 0, btf_type_id = 0, err;
6774                 const char *attach_name;
6775
6776                 attach_name = strchr(prog->sec_name, '/');
6777                 if (!attach_name) {
6778                         /* if BPF program is annotated with just SEC("fentry")
6779                          * (or similar) without declaratively specifying
6780                          * target, then it is expected that target will be
6781                          * specified with bpf_program__set_attach_target() at
6782                          * runtime before BPF object load step. If not, then
6783                          * there is nothing to load into the kernel as BPF
6784                          * verifier won't be able to validate BPF program
6785                          * correctness anyways.
6786                          */
6787                         pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
6788                                 prog->name);
6789                         return -EINVAL;
6790                 }
6791                 attach_name++; /* skip over / */
6792
6793                 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
6794                 if (err)
6795                         return err;
6796
6797                 /* cache resolved BTF FD and BTF type ID in the prog */
6798                 prog->attach_btf_obj_fd = btf_obj_fd;
6799                 prog->attach_btf_id = btf_type_id;
6800
6801                 /* but by now libbpf common logic is not utilizing
6802                  * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
6803                  * this callback is called after opts were populated by
6804                  * libbpf, so this callback has to update opts explicitly here
6805                  */
6806                 opts->attach_btf_obj_fd = btf_obj_fd;
6807                 opts->attach_btf_id = btf_type_id;
6808         }
6809         return 0;
6810 }
6811
6812 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
6813
6814 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
6815                                 struct bpf_insn *insns, int insns_cnt,
6816                                 const char *license, __u32 kern_version, int *prog_fd)
6817 {
6818         LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
6819         const char *prog_name = NULL;
6820         char *cp, errmsg[STRERR_BUFSIZE];
6821         size_t log_buf_size = 0;
6822         char *log_buf = NULL, *tmp;
6823         int btf_fd, ret, err;
6824         bool own_log_buf = true;
6825         __u32 log_level = prog->log_level;
6826
6827         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
6828                 /*
6829                  * The program type must be set.  Most likely we couldn't find a proper
6830                  * section definition at load time, and thus we didn't infer the type.
6831                  */
6832                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
6833                         prog->name, prog->sec_name);
6834                 return -EINVAL;
6835         }
6836
6837         if (!insns || !insns_cnt)
6838                 return -EINVAL;
6839
6840         load_attr.expected_attach_type = prog->expected_attach_type;
6841         if (kernel_supports(obj, FEAT_PROG_NAME))
6842                 prog_name = prog->name;
6843         load_attr.attach_prog_fd = prog->attach_prog_fd;
6844         load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
6845         load_attr.attach_btf_id = prog->attach_btf_id;
6846         load_attr.kern_version = kern_version;
6847         load_attr.prog_ifindex = prog->prog_ifindex;
6848
6849         /* specify func_info/line_info only if kernel supports them */
6850         btf_fd = bpf_object__btf_fd(obj);
6851         if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
6852                 load_attr.prog_btf_fd = btf_fd;
6853                 load_attr.func_info = prog->func_info;
6854                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6855                 load_attr.func_info_cnt = prog->func_info_cnt;
6856                 load_attr.line_info = prog->line_info;
6857                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6858                 load_attr.line_info_cnt = prog->line_info_cnt;
6859         }
6860         load_attr.log_level = log_level;
6861         load_attr.prog_flags = prog->prog_flags;
6862         load_attr.fd_array = obj->fd_array;
6863
6864         /* adjust load_attr if sec_def provides custom preload callback */
6865         if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
6866                 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
6867                 if (err < 0) {
6868                         pr_warn("prog '%s': failed to prepare load attributes: %d\n",
6869                                 prog->name, err);
6870                         return err;
6871                 }
6872                 insns = prog->insns;
6873                 insns_cnt = prog->insns_cnt;
6874         }
6875
6876         if (obj->gen_loader) {
6877                 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
6878                                    license, insns, insns_cnt, &load_attr,
6879                                    prog - obj->programs);
6880                 *prog_fd = -1;
6881                 return 0;
6882         }
6883
6884 retry_load:
6885         /* if log_level is zero, we don't request logs initially even if
6886          * custom log_buf is specified; if the program load fails, then we'll
6887          * bump log_level to 1 and use either custom log_buf or we'll allocate
6888          * our own and retry the load to get details on what failed
6889          */
6890         if (log_level) {
6891                 if (prog->log_buf) {
6892                         log_buf = prog->log_buf;
6893                         log_buf_size = prog->log_size;
6894                         own_log_buf = false;
6895                 } else if (obj->log_buf) {
6896                         log_buf = obj->log_buf;
6897                         log_buf_size = obj->log_size;
6898                         own_log_buf = false;
6899                 } else {
6900                         log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
6901                         tmp = realloc(log_buf, log_buf_size);
6902                         if (!tmp) {
6903                                 ret = -ENOMEM;
6904                                 goto out;
6905                         }
6906                         log_buf = tmp;
6907                         log_buf[0] = '\0';
6908                         own_log_buf = true;
6909                 }
6910         }
6911
6912         load_attr.log_buf = log_buf;
6913         load_attr.log_size = log_buf_size;
6914         load_attr.log_level = log_level;
6915
6916         ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
6917         if (ret >= 0) {
6918                 if (log_level && own_log_buf) {
6919                         pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6920                                  prog->name, log_buf);
6921                 }
6922
6923                 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
6924                         struct bpf_map *map;
6925                         int i;
6926
6927                         for (i = 0; i < obj->nr_maps; i++) {
6928                                 map = &prog->obj->maps[i];
6929                                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
6930                                         continue;
6931
6932                                 if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
6933                                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6934                                         pr_warn("prog '%s': failed to bind map '%s': %s\n",
6935                                                 prog->name, map->real_name, cp);
6936                                         /* Don't fail hard if can't bind rodata. */
6937                                 }
6938                         }
6939                 }
6940
6941                 *prog_fd = ret;
6942                 ret = 0;
6943                 goto out;
6944         }
6945
6946         if (log_level == 0) {
6947                 log_level = 1;
6948                 goto retry_load;
6949         }
6950         /* On ENOSPC, increase log buffer size and retry, unless custom
6951          * log_buf is specified.
6952          * Be careful to not overflow u32, though. Kernel's log buf size limit
6953          * isn't part of UAPI so it can always be bumped to full 4GB. So don't
6954          * multiply by 2 unless we are sure we'll fit within 32 bits.
6955          * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
6956          */
6957         if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
6958                 goto retry_load;
6959
6960         ret = -errno;
6961
6962         /* post-process verifier log to improve error descriptions */
6963         fixup_verifier_log(prog, log_buf, log_buf_size);
6964
6965         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6966         pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
6967         pr_perm_msg(ret);
6968
6969         if (own_log_buf && log_buf && log_buf[0] != '\0') {
6970                 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6971                         prog->name, log_buf);
6972         }
6973
6974 out:
6975         if (own_log_buf)
6976                 free(log_buf);
6977         return ret;
6978 }
6979
6980 static char *find_prev_line(char *buf, char *cur)
6981 {
6982         char *p;
6983
6984         if (cur == buf) /* end of a log buf */
6985                 return NULL;
6986
6987         p = cur - 1;
6988         while (p - 1 >= buf && *(p - 1) != '\n')
6989                 p--;
6990
6991         return p;
6992 }
6993
6994 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
6995                       char *orig, size_t orig_sz, const char *patch)
6996 {
6997         /* size of the remaining log content to the right from the to-be-replaced part */
6998         size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
6999         size_t patch_sz = strlen(patch);
7000
7001         if (patch_sz != orig_sz) {
7002                 /* If patch line(s) are longer than original piece of verifier log,
7003                  * shift log contents by (patch_sz - orig_sz) bytes to the right
7004                  * starting from after to-be-replaced part of the log.
7005                  *
7006                  * If patch line(s) are shorter than original piece of verifier log,
7007                  * shift log contents by (orig_sz - patch_sz) bytes to the left
7008                  * starting from after to-be-replaced part of the log
7009                  *
7010                  * We need to be careful about not overflowing available
7011                  * buf_sz capacity. If that's the case, we'll truncate the end
7012                  * of the original log, as necessary.
7013                  */
7014                 if (patch_sz > orig_sz) {
7015                         if (orig + patch_sz >= buf + buf_sz) {
7016                                 /* patch is big enough to cover remaining space completely */
7017                                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7018                                 rem_sz = 0;
7019                         } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7020                                 /* patch causes part of remaining log to be truncated */
7021                                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7022                         }
7023                 }
7024                 /* shift remaining log to the right by calculated amount */
7025                 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7026         }
7027
7028         memcpy(orig, patch, patch_sz);
7029 }
7030
7031 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7032                                        char *buf, size_t buf_sz, size_t log_sz,
7033                                        char *line1, char *line2, char *line3)
7034 {
7035         /* Expected log for failed and not properly guarded CO-RE relocation:
7036          * line1 -> 123: (85) call unknown#195896080
7037          * line2 -> invalid func unknown#195896080
7038          * line3 -> <anything else or end of buffer>
7039          *
7040          * "123" is the index of the instruction that was poisoned. We extract
7041          * instruction index to find corresponding CO-RE relocation and
7042          * replace this part of the log with more relevant information about
7043          * failed CO-RE relocation.
7044          */
7045         const struct bpf_core_relo *relo;
7046         struct bpf_core_spec spec;
7047         char patch[512], spec_buf[256];
7048         int insn_idx, err, spec_len;
7049
7050         if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7051                 return;
7052
7053         relo = find_relo_core(prog, insn_idx);
7054         if (!relo)
7055                 return;
7056
7057         err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7058         if (err)
7059                 return;
7060
7061         spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7062         snprintf(patch, sizeof(patch),
7063                  "%d: <invalid CO-RE relocation>\n"
7064                  "failed to resolve CO-RE relocation %s%s\n",
7065                  insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7066
7067         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7068 }
7069
7070 static void fixup_log_missing_map_load(struct bpf_program *prog,
7071                                        char *buf, size_t buf_sz, size_t log_sz,
7072                                        char *line1, char *line2, char *line3)
7073 {
7074         /* Expected log for failed and not properly guarded map reference:
7075          * line1 -> 123: (85) call unknown#2001000345
7076          * line2 -> invalid func unknown#2001000345
7077          * line3 -> <anything else or end of buffer>
7078          *
7079          * "123" is the index of the instruction that was poisoned.
7080          * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7081          */
7082         struct bpf_object *obj = prog->obj;
7083         const struct bpf_map *map;
7084         int insn_idx, map_idx;
7085         char patch[128];
7086
7087         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7088                 return;
7089
7090         map_idx -= POISON_LDIMM64_MAP_BASE;
7091         if (map_idx < 0 || map_idx >= obj->nr_maps)
7092                 return;
7093         map = &obj->maps[map_idx];
7094
7095         snprintf(patch, sizeof(patch),
7096                  "%d: <invalid BPF map reference>\n"
7097                  "BPF map '%s' is referenced but wasn't created\n",
7098                  insn_idx, map->name);
7099
7100         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7101 }
7102
7103 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7104                                          char *buf, size_t buf_sz, size_t log_sz,
7105                                          char *line1, char *line2, char *line3)
7106 {
7107         /* Expected log for failed and not properly guarded kfunc call:
7108          * line1 -> 123: (85) call unknown#2002000345
7109          * line2 -> invalid func unknown#2002000345
7110          * line3 -> <anything else or end of buffer>
7111          *
7112          * "123" is the index of the instruction that was poisoned.
7113          * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7114          */
7115         struct bpf_object *obj = prog->obj;
7116         const struct extern_desc *ext;
7117         int insn_idx, ext_idx;
7118         char patch[128];
7119
7120         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7121                 return;
7122
7123         ext_idx -= POISON_CALL_KFUNC_BASE;
7124         if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7125                 return;
7126         ext = &obj->externs[ext_idx];
7127
7128         snprintf(patch, sizeof(patch),
7129                  "%d: <invalid kfunc call>\n"
7130                  "kfunc '%s' is referenced but wasn't resolved\n",
7131                  insn_idx, ext->name);
7132
7133         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7134 }
7135
7136 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7137 {
7138         /* look for familiar error patterns in last N lines of the log */
7139         const size_t max_last_line_cnt = 10;
7140         char *prev_line, *cur_line, *next_line;
7141         size_t log_sz;
7142         int i;
7143
7144         if (!buf)
7145                 return;
7146
7147         log_sz = strlen(buf) + 1;
7148         next_line = buf + log_sz - 1;
7149
7150         for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7151                 cur_line = find_prev_line(buf, next_line);
7152                 if (!cur_line)
7153                         return;
7154
7155                 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7156                         prev_line = find_prev_line(buf, cur_line);
7157                         if (!prev_line)
7158                                 continue;
7159
7160                         /* failed CO-RE relocation case */
7161                         fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7162                                                    prev_line, cur_line, next_line);
7163                         return;
7164                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7165                         prev_line = find_prev_line(buf, cur_line);
7166                         if (!prev_line)
7167                                 continue;
7168
7169                         /* reference to uncreated BPF map */
7170                         fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7171                                                    prev_line, cur_line, next_line);
7172                         return;
7173                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7174                         prev_line = find_prev_line(buf, cur_line);
7175                         if (!prev_line)
7176                                 continue;
7177
7178                         /* reference to unresolved kfunc */
7179                         fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7180                                                      prev_line, cur_line, next_line);
7181                         return;
7182                 }
7183         }
7184 }
7185
7186 static int bpf_program_record_relos(struct bpf_program *prog)
7187 {
7188         struct bpf_object *obj = prog->obj;
7189         int i;
7190
7191         for (i = 0; i < prog->nr_reloc; i++) {
7192                 struct reloc_desc *relo = &prog->reloc_desc[i];
7193                 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7194                 int kind;
7195
7196                 switch (relo->type) {
7197                 case RELO_EXTERN_LD64:
7198                         if (ext->type != EXT_KSYM)
7199                                 continue;
7200                         kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7201                                 BTF_KIND_VAR : BTF_KIND_FUNC;
7202                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7203                                                ext->is_weak, !ext->ksym.type_id,
7204                                                true, kind, relo->insn_idx);
7205                         break;
7206                 case RELO_EXTERN_CALL:
7207                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7208                                                ext->is_weak, false, false, BTF_KIND_FUNC,
7209                                                relo->insn_idx);
7210                         break;
7211                 case RELO_CORE: {
7212                         struct bpf_core_relo cr = {
7213                                 .insn_off = relo->insn_idx * 8,
7214                                 .type_id = relo->core_relo->type_id,
7215                                 .access_str_off = relo->core_relo->access_str_off,
7216                                 .kind = relo->core_relo->kind,
7217                         };
7218
7219                         bpf_gen__record_relo_core(obj->gen_loader, &cr);
7220                         break;
7221                 }
7222                 default:
7223                         continue;
7224                 }
7225         }
7226         return 0;
7227 }
7228
7229 static int
7230 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7231 {
7232         struct bpf_program *prog;
7233         size_t i;
7234         int err;
7235
7236         for (i = 0; i < obj->nr_programs; i++) {
7237                 prog = &obj->programs[i];
7238                 err = bpf_object__sanitize_prog(obj, prog);
7239                 if (err)
7240                         return err;
7241         }
7242
7243         for (i = 0; i < obj->nr_programs; i++) {
7244                 prog = &obj->programs[i];
7245                 if (prog_is_subprog(obj, prog))
7246                         continue;
7247                 if (!prog->autoload) {
7248                         pr_debug("prog '%s': skipped loading\n", prog->name);
7249                         continue;
7250                 }
7251                 prog->log_level |= log_level;
7252
7253                 if (obj->gen_loader)
7254                         bpf_program_record_relos(prog);
7255
7256                 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7257                                            obj->license, obj->kern_version, &prog->fd);
7258                 if (err) {
7259                         pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7260                         return err;
7261                 }
7262         }
7263
7264         bpf_object__free_relocs(obj);
7265         return 0;
7266 }
7267
7268 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7269
7270 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7271 {
7272         struct bpf_program *prog;
7273         int err;
7274
7275         bpf_object__for_each_program(prog, obj) {
7276                 prog->sec_def = find_sec_def(prog->sec_name);
7277                 if (!prog->sec_def) {
7278                         /* couldn't guess, but user might manually specify */
7279                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7280                                 prog->name, prog->sec_name);
7281                         continue;
7282                 }
7283
7284                 prog->type = prog->sec_def->prog_type;
7285                 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7286
7287                 /* sec_def can have custom callback which should be called
7288                  * after bpf_program is initialized to adjust its properties
7289                  */
7290                 if (prog->sec_def->prog_setup_fn) {
7291                         err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7292                         if (err < 0) {
7293                                 pr_warn("prog '%s': failed to initialize: %d\n",
7294                                         prog->name, err);
7295                                 return err;
7296                         }
7297                 }
7298         }
7299
7300         return 0;
7301 }
7302
7303 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7304                                           const struct bpf_object_open_opts *opts)
7305 {
7306         const char *obj_name, *kconfig, *btf_tmp_path;
7307         struct bpf_object *obj;
7308         char tmp_name[64];
7309         int err;
7310         char *log_buf;
7311         size_t log_size;
7312         __u32 log_level;
7313
7314         if (elf_version(EV_CURRENT) == EV_NONE) {
7315                 pr_warn("failed to init libelf for %s\n",
7316                         path ? : "(mem buf)");
7317                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7318         }
7319
7320         if (!OPTS_VALID(opts, bpf_object_open_opts))
7321                 return ERR_PTR(-EINVAL);
7322
7323         obj_name = OPTS_GET(opts, object_name, NULL);
7324         if (obj_buf) {
7325                 if (!obj_name) {
7326                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7327                                  (unsigned long)obj_buf,
7328                                  (unsigned long)obj_buf_sz);
7329                         obj_name = tmp_name;
7330                 }
7331                 path = obj_name;
7332                 pr_debug("loading object '%s' from buffer\n", obj_name);
7333         }
7334
7335         log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7336         log_size = OPTS_GET(opts, kernel_log_size, 0);
7337         log_level = OPTS_GET(opts, kernel_log_level, 0);
7338         if (log_size > UINT_MAX)
7339                 return ERR_PTR(-EINVAL);
7340         if (log_size && !log_buf)
7341                 return ERR_PTR(-EINVAL);
7342
7343         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7344         if (IS_ERR(obj))
7345                 return obj;
7346
7347         obj->log_buf = log_buf;
7348         obj->log_size = log_size;
7349         obj->log_level = log_level;
7350
7351         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7352         if (btf_tmp_path) {
7353                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7354                         err = -ENAMETOOLONG;
7355                         goto out;
7356                 }
7357                 obj->btf_custom_path = strdup(btf_tmp_path);
7358                 if (!obj->btf_custom_path) {
7359                         err = -ENOMEM;
7360                         goto out;
7361                 }
7362         }
7363
7364         kconfig = OPTS_GET(opts, kconfig, NULL);
7365         if (kconfig) {
7366                 obj->kconfig = strdup(kconfig);
7367                 if (!obj->kconfig) {
7368                         err = -ENOMEM;
7369                         goto out;
7370                 }
7371         }
7372
7373         err = bpf_object__elf_init(obj);
7374         err = err ? : bpf_object__check_endianness(obj);
7375         err = err ? : bpf_object__elf_collect(obj);
7376         err = err ? : bpf_object__collect_externs(obj);
7377         err = err ? : bpf_object_fixup_btf(obj);
7378         err = err ? : bpf_object__init_maps(obj, opts);
7379         err = err ? : bpf_object_init_progs(obj, opts);
7380         err = err ? : bpf_object__collect_relos(obj);
7381         if (err)
7382                 goto out;
7383
7384         bpf_object__elf_finish(obj);
7385
7386         return obj;
7387 out:
7388         bpf_object__close(obj);
7389         return ERR_PTR(err);
7390 }
7391
7392 struct bpf_object *
7393 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7394 {
7395         if (!path)
7396                 return libbpf_err_ptr(-EINVAL);
7397
7398         pr_debug("loading %s\n", path);
7399
7400         return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7401 }
7402
7403 struct bpf_object *bpf_object__open(const char *path)
7404 {
7405         return bpf_object__open_file(path, NULL);
7406 }
7407
7408 struct bpf_object *
7409 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7410                      const struct bpf_object_open_opts *opts)
7411 {
7412         if (!obj_buf || obj_buf_sz == 0)
7413                 return libbpf_err_ptr(-EINVAL);
7414
7415         return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7416 }
7417
7418 static int bpf_object_unload(struct bpf_object *obj)
7419 {
7420         size_t i;
7421
7422         if (!obj)
7423                 return libbpf_err(-EINVAL);
7424
7425         for (i = 0; i < obj->nr_maps; i++) {
7426                 zclose(obj->maps[i].fd);
7427                 if (obj->maps[i].st_ops)
7428                         zfree(&obj->maps[i].st_ops->kern_vdata);
7429         }
7430
7431         for (i = 0; i < obj->nr_programs; i++)
7432                 bpf_program__unload(&obj->programs[i]);
7433
7434         return 0;
7435 }
7436
7437 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7438 {
7439         struct bpf_map *m;
7440
7441         bpf_object__for_each_map(m, obj) {
7442                 if (!bpf_map__is_internal(m))
7443                         continue;
7444                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7445                         m->def.map_flags &= ~BPF_F_MMAPABLE;
7446         }
7447
7448         return 0;
7449 }
7450
7451 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7452 {
7453         char sym_type, sym_name[500];
7454         unsigned long long sym_addr;
7455         int ret, err = 0;
7456         FILE *f;
7457
7458         f = fopen("/proc/kallsyms", "re");
7459         if (!f) {
7460                 err = -errno;
7461                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7462                 return err;
7463         }
7464
7465         while (true) {
7466                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7467                              &sym_addr, &sym_type, sym_name);
7468                 if (ret == EOF && feof(f))
7469                         break;
7470                 if (ret != 3) {
7471                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7472                         err = -EINVAL;
7473                         break;
7474                 }
7475
7476                 err = cb(sym_addr, sym_type, sym_name, ctx);
7477                 if (err)
7478                         break;
7479         }
7480
7481         fclose(f);
7482         return err;
7483 }
7484
7485 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7486                        const char *sym_name, void *ctx)
7487 {
7488         struct bpf_object *obj = ctx;
7489         const struct btf_type *t;
7490         struct extern_desc *ext;
7491
7492         ext = find_extern_by_name(obj, sym_name);
7493         if (!ext || ext->type != EXT_KSYM)
7494                 return 0;
7495
7496         t = btf__type_by_id(obj->btf, ext->btf_id);
7497         if (!btf_is_var(t))
7498                 return 0;
7499
7500         if (ext->is_set && ext->ksym.addr != sym_addr) {
7501                 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7502                         sym_name, ext->ksym.addr, sym_addr);
7503                 return -EINVAL;
7504         }
7505         if (!ext->is_set) {
7506                 ext->is_set = true;
7507                 ext->ksym.addr = sym_addr;
7508                 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7509         }
7510         return 0;
7511 }
7512
7513 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7514 {
7515         return libbpf_kallsyms_parse(kallsyms_cb, obj);
7516 }
7517
7518 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7519                             __u16 kind, struct btf **res_btf,
7520                             struct module_btf **res_mod_btf)
7521 {
7522         struct module_btf *mod_btf;
7523         struct btf *btf;
7524         int i, id, err;
7525
7526         btf = obj->btf_vmlinux;
7527         mod_btf = NULL;
7528         id = btf__find_by_name_kind(btf, ksym_name, kind);
7529
7530         if (id == -ENOENT) {
7531                 err = load_module_btfs(obj);
7532                 if (err)
7533                         return err;
7534
7535                 for (i = 0; i < obj->btf_module_cnt; i++) {
7536                         /* we assume module_btf's BTF FD is always >0 */
7537                         mod_btf = &obj->btf_modules[i];
7538                         btf = mod_btf->btf;
7539                         id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7540                         if (id != -ENOENT)
7541                                 break;
7542                 }
7543         }
7544         if (id <= 0)
7545                 return -ESRCH;
7546
7547         *res_btf = btf;
7548         *res_mod_btf = mod_btf;
7549         return id;
7550 }
7551
7552 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7553                                                struct extern_desc *ext)
7554 {
7555         const struct btf_type *targ_var, *targ_type;
7556         __u32 targ_type_id, local_type_id;
7557         struct module_btf *mod_btf = NULL;
7558         const char *targ_var_name;
7559         struct btf *btf = NULL;
7560         int id, err;
7561
7562         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7563         if (id < 0) {
7564                 if (id == -ESRCH && ext->is_weak)
7565                         return 0;
7566                 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7567                         ext->name);
7568                 return id;
7569         }
7570
7571         /* find local type_id */
7572         local_type_id = ext->ksym.type_id;
7573
7574         /* find target type_id */
7575         targ_var = btf__type_by_id(btf, id);
7576         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7577         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7578
7579         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7580                                         btf, targ_type_id);
7581         if (err <= 0) {
7582                 const struct btf_type *local_type;
7583                 const char *targ_name, *local_name;
7584
7585                 local_type = btf__type_by_id(obj->btf, local_type_id);
7586                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7587                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7588
7589                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7590                         ext->name, local_type_id,
7591                         btf_kind_str(local_type), local_name, targ_type_id,
7592                         btf_kind_str(targ_type), targ_name);
7593                 return -EINVAL;
7594         }
7595
7596         ext->is_set = true;
7597         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7598         ext->ksym.kernel_btf_id = id;
7599         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7600                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7601
7602         return 0;
7603 }
7604
7605 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7606                                                 struct extern_desc *ext)
7607 {
7608         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7609         struct module_btf *mod_btf = NULL;
7610         const struct btf_type *kern_func;
7611         struct btf *kern_btf = NULL;
7612         int ret;
7613
7614         local_func_proto_id = ext->ksym.type_id;
7615
7616         kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
7617         if (kfunc_id < 0) {
7618                 if (kfunc_id == -ESRCH && ext->is_weak)
7619                         return 0;
7620                 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7621                         ext->name);
7622                 return kfunc_id;
7623         }
7624
7625         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7626         kfunc_proto_id = kern_func->type;
7627
7628         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7629                                         kern_btf, kfunc_proto_id);
7630         if (ret <= 0) {
7631                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
7632                         ext->name, local_func_proto_id,
7633                         mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
7634                 return -EINVAL;
7635         }
7636
7637         /* set index for module BTF fd in fd_array, if unset */
7638         if (mod_btf && !mod_btf->fd_array_idx) {
7639                 /* insn->off is s16 */
7640                 if (obj->fd_array_cnt == INT16_MAX) {
7641                         pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
7642                                 ext->name, mod_btf->fd_array_idx);
7643                         return -E2BIG;
7644                 }
7645                 /* Cannot use index 0 for module BTF fd */
7646                 if (!obj->fd_array_cnt)
7647                         obj->fd_array_cnt = 1;
7648
7649                 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
7650                                         obj->fd_array_cnt + 1);
7651                 if (ret)
7652                         return ret;
7653                 mod_btf->fd_array_idx = obj->fd_array_cnt;
7654                 /* we assume module BTF FD is always >0 */
7655                 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
7656         }
7657
7658         ext->is_set = true;
7659         ext->ksym.kernel_btf_id = kfunc_id;
7660         ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
7661         /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
7662          * populates FD into ld_imm64 insn when it's used to point to kfunc.
7663          * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
7664          * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
7665          */
7666         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7667         pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
7668                  ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
7669
7670         return 0;
7671 }
7672
7673 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7674 {
7675         const struct btf_type *t;
7676         struct extern_desc *ext;
7677         int i, err;
7678
7679         for (i = 0; i < obj->nr_extern; i++) {
7680                 ext = &obj->externs[i];
7681                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7682                         continue;
7683
7684                 if (obj->gen_loader) {
7685                         ext->is_set = true;
7686                         ext->ksym.kernel_btf_obj_fd = 0;
7687                         ext->ksym.kernel_btf_id = 0;
7688                         continue;
7689                 }
7690                 t = btf__type_by_id(obj->btf, ext->btf_id);
7691                 if (btf_is_var(t))
7692                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7693                 else
7694                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
7695                 if (err)
7696                         return err;
7697         }
7698         return 0;
7699 }
7700
7701 static int bpf_object__resolve_externs(struct bpf_object *obj,
7702                                        const char *extra_kconfig)
7703 {
7704         bool need_config = false, need_kallsyms = false;
7705         bool need_vmlinux_btf = false;
7706         struct extern_desc *ext;
7707         void *kcfg_data = NULL;
7708         int err, i;
7709
7710         if (obj->nr_extern == 0)
7711                 return 0;
7712
7713         if (obj->kconfig_map_idx >= 0)
7714                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7715
7716         for (i = 0; i < obj->nr_extern; i++) {
7717                 ext = &obj->externs[i];
7718
7719                 if (ext->type == EXT_KSYM) {
7720                         if (ext->ksym.type_id)
7721                                 need_vmlinux_btf = true;
7722                         else
7723                                 need_kallsyms = true;
7724                         continue;
7725                 } else if (ext->type == EXT_KCFG) {
7726                         void *ext_ptr = kcfg_data + ext->kcfg.data_off;
7727                         __u64 value = 0;
7728
7729                         /* Kconfig externs need actual /proc/config.gz */
7730                         if (str_has_pfx(ext->name, "CONFIG_")) {
7731                                 need_config = true;
7732                                 continue;
7733                         }
7734
7735                         /* Virtual kcfg externs are customly handled by libbpf */
7736                         if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7737                                 value = get_kernel_version();
7738                                 if (!value) {
7739                                         pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
7740                                         return -EINVAL;
7741                                 }
7742                         } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
7743                                 value = kernel_supports(obj, FEAT_BPF_COOKIE);
7744                         } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
7745                                 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
7746                         } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
7747                                 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
7748                                  * __kconfig externs, where LINUX_ ones are virtual and filled out
7749                                  * customly by libbpf (their values don't come from Kconfig).
7750                                  * If LINUX_xxx variable is not recognized by libbpf, but is marked
7751                                  * __weak, it defaults to zero value, just like for CONFIG_xxx
7752                                  * externs.
7753                                  */
7754                                 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
7755                                 return -EINVAL;
7756                         }
7757
7758                         err = set_kcfg_value_num(ext, ext_ptr, value);
7759                         if (err)
7760                                 return err;
7761                         pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
7762                                  ext->name, (long long)value);
7763                 } else {
7764                         pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
7765                         return -EINVAL;
7766                 }
7767         }
7768         if (need_config && extra_kconfig) {
7769                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7770                 if (err)
7771                         return -EINVAL;
7772                 need_config = false;
7773                 for (i = 0; i < obj->nr_extern; i++) {
7774                         ext = &obj->externs[i];
7775                         if (ext->type == EXT_KCFG && !ext->is_set) {
7776                                 need_config = true;
7777                                 break;
7778                         }
7779                 }
7780         }
7781         if (need_config) {
7782                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
7783                 if (err)
7784                         return -EINVAL;
7785         }
7786         if (need_kallsyms) {
7787                 err = bpf_object__read_kallsyms_file(obj);
7788                 if (err)
7789                         return -EINVAL;
7790         }
7791         if (need_vmlinux_btf) {
7792                 err = bpf_object__resolve_ksyms_btf_id(obj);
7793                 if (err)
7794                         return -EINVAL;
7795         }
7796         for (i = 0; i < obj->nr_extern; i++) {
7797                 ext = &obj->externs[i];
7798
7799                 if (!ext->is_set && !ext->is_weak) {
7800                         pr_warn("extern '%s' (strong): not resolved\n", ext->name);
7801                         return -ESRCH;
7802                 } else if (!ext->is_set) {
7803                         pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
7804                                  ext->name);
7805                 }
7806         }
7807
7808         return 0;
7809 }
7810
7811 static void bpf_map_prepare_vdata(const struct bpf_map *map)
7812 {
7813         struct bpf_struct_ops *st_ops;
7814         __u32 i;
7815
7816         st_ops = map->st_ops;
7817         for (i = 0; i < btf_vlen(st_ops->type); i++) {
7818                 struct bpf_program *prog = st_ops->progs[i];
7819                 void *kern_data;
7820                 int prog_fd;
7821
7822                 if (!prog)
7823                         continue;
7824
7825                 prog_fd = bpf_program__fd(prog);
7826                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
7827                 *(unsigned long *)kern_data = prog_fd;
7828         }
7829 }
7830
7831 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
7832 {
7833         int i;
7834
7835         for (i = 0; i < obj->nr_maps; i++)
7836                 if (bpf_map__is_struct_ops(&obj->maps[i]))
7837                         bpf_map_prepare_vdata(&obj->maps[i]);
7838
7839         return 0;
7840 }
7841
7842 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
7843 {
7844         int err, i;
7845
7846         if (!obj)
7847                 return libbpf_err(-EINVAL);
7848
7849         if (obj->loaded) {
7850                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7851                 return libbpf_err(-EINVAL);
7852         }
7853
7854         if (obj->gen_loader)
7855                 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
7856
7857         err = bpf_object__probe_loading(obj);
7858         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
7859         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7860         err = err ? : bpf_object__sanitize_and_load_btf(obj);
7861         err = err ? : bpf_object__sanitize_maps(obj);
7862         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7863         err = err ? : bpf_object__create_maps(obj);
7864         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
7865         err = err ? : bpf_object__load_progs(obj, extra_log_level);
7866         err = err ? : bpf_object_init_prog_arrays(obj);
7867         err = err ? : bpf_object_prepare_struct_ops(obj);
7868
7869         if (obj->gen_loader) {
7870                 /* reset FDs */
7871                 if (obj->btf)
7872                         btf__set_fd(obj->btf, -1);
7873                 for (i = 0; i < obj->nr_maps; i++)
7874                         obj->maps[i].fd = -1;
7875                 if (!err)
7876                         err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
7877         }
7878
7879         /* clean up fd_array */
7880         zfree(&obj->fd_array);
7881
7882         /* clean up module BTFs */
7883         for (i = 0; i < obj->btf_module_cnt; i++) {
7884                 close(obj->btf_modules[i].fd);
7885                 btf__free(obj->btf_modules[i].btf);
7886                 free(obj->btf_modules[i].name);
7887         }
7888         free(obj->btf_modules);
7889
7890         /* clean up vmlinux BTF */
7891         btf__free(obj->btf_vmlinux);
7892         obj->btf_vmlinux = NULL;
7893
7894         obj->loaded = true; /* doesn't matter if successfully or not */
7895
7896         if (err)
7897                 goto out;
7898
7899         return 0;
7900 out:
7901         /* unpin any maps that were auto-pinned during load */
7902         for (i = 0; i < obj->nr_maps; i++)
7903                 if (obj->maps[i].pinned && !obj->maps[i].reused)
7904                         bpf_map__unpin(&obj->maps[i], NULL);
7905
7906         bpf_object_unload(obj);
7907         pr_warn("failed to load object '%s'\n", obj->path);
7908         return libbpf_err(err);
7909 }
7910
7911 int bpf_object__load(struct bpf_object *obj)
7912 {
7913         return bpf_object_load(obj, 0, NULL);
7914 }
7915
7916 static int make_parent_dir(const char *path)
7917 {
7918         char *cp, errmsg[STRERR_BUFSIZE];
7919         char *dname, *dir;
7920         int err = 0;
7921
7922         dname = strdup(path);
7923         if (dname == NULL)
7924                 return -ENOMEM;
7925
7926         dir = dirname(dname);
7927         if (mkdir(dir, 0700) && errno != EEXIST)
7928                 err = -errno;
7929
7930         free(dname);
7931         if (err) {
7932                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7933                 pr_warn("failed to mkdir %s: %s\n", path, cp);
7934         }
7935         return err;
7936 }
7937
7938 static int check_path(const char *path)
7939 {
7940         char *cp, errmsg[STRERR_BUFSIZE];
7941         struct statfs st_fs;
7942         char *dname, *dir;
7943         int err = 0;
7944
7945         if (path == NULL)
7946                 return -EINVAL;
7947
7948         dname = strdup(path);
7949         if (dname == NULL)
7950                 return -ENOMEM;
7951
7952         dir = dirname(dname);
7953         if (statfs(dir, &st_fs)) {
7954                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7955                 pr_warn("failed to statfs %s: %s\n", dir, cp);
7956                 err = -errno;
7957         }
7958         free(dname);
7959
7960         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7961                 pr_warn("specified path %s is not on BPF FS\n", path);
7962                 err = -EINVAL;
7963         }
7964
7965         return err;
7966 }
7967
7968 int bpf_program__pin(struct bpf_program *prog, const char *path)
7969 {
7970         char *cp, errmsg[STRERR_BUFSIZE];
7971         int err;
7972
7973         if (prog->fd < 0) {
7974                 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
7975                 return libbpf_err(-EINVAL);
7976         }
7977
7978         err = make_parent_dir(path);
7979         if (err)
7980                 return libbpf_err(err);
7981
7982         err = check_path(path);
7983         if (err)
7984                 return libbpf_err(err);
7985
7986         if (bpf_obj_pin(prog->fd, path)) {
7987                 err = -errno;
7988                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7989                 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
7990                 return libbpf_err(err);
7991         }
7992
7993         pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
7994         return 0;
7995 }
7996
7997 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7998 {
7999         int err;
8000
8001         if (prog->fd < 0) {
8002                 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8003                 return libbpf_err(-EINVAL);
8004         }
8005
8006         err = check_path(path);
8007         if (err)
8008                 return libbpf_err(err);
8009
8010         err = unlink(path);
8011         if (err)
8012                 return libbpf_err(-errno);
8013
8014         pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8015         return 0;
8016 }
8017
8018 int bpf_map__pin(struct bpf_map *map, const char *path)
8019 {
8020         char *cp, errmsg[STRERR_BUFSIZE];
8021         int err;
8022
8023         if (map == NULL) {
8024                 pr_warn("invalid map pointer\n");
8025                 return libbpf_err(-EINVAL);
8026         }
8027
8028         if (map->pin_path) {
8029                 if (path && strcmp(path, map->pin_path)) {
8030                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8031                                 bpf_map__name(map), map->pin_path, path);
8032                         return libbpf_err(-EINVAL);
8033                 } else if (map->pinned) {
8034                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8035                                  bpf_map__name(map), map->pin_path);
8036                         return 0;
8037                 }
8038         } else {
8039                 if (!path) {
8040                         pr_warn("missing a path to pin map '%s' at\n",
8041                                 bpf_map__name(map));
8042                         return libbpf_err(-EINVAL);
8043                 } else if (map->pinned) {
8044                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8045                         return libbpf_err(-EEXIST);
8046                 }
8047
8048                 map->pin_path = strdup(path);
8049                 if (!map->pin_path) {
8050                         err = -errno;
8051                         goto out_err;
8052                 }
8053         }
8054
8055         err = make_parent_dir(map->pin_path);
8056         if (err)
8057                 return libbpf_err(err);
8058
8059         err = check_path(map->pin_path);
8060         if (err)
8061                 return libbpf_err(err);
8062
8063         if (bpf_obj_pin(map->fd, map->pin_path)) {
8064                 err = -errno;
8065                 goto out_err;
8066         }
8067
8068         map->pinned = true;
8069         pr_debug("pinned map '%s'\n", map->pin_path);
8070
8071         return 0;
8072
8073 out_err:
8074         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8075         pr_warn("failed to pin map: %s\n", cp);
8076         return libbpf_err(err);
8077 }
8078
8079 int bpf_map__unpin(struct bpf_map *map, const char *path)
8080 {
8081         int err;
8082
8083         if (map == NULL) {
8084                 pr_warn("invalid map pointer\n");
8085                 return libbpf_err(-EINVAL);
8086         }
8087
8088         if (map->pin_path) {
8089                 if (path && strcmp(path, map->pin_path)) {
8090                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8091                                 bpf_map__name(map), map->pin_path, path);
8092                         return libbpf_err(-EINVAL);
8093                 }
8094                 path = map->pin_path;
8095         } else if (!path) {
8096                 pr_warn("no path to unpin map '%s' from\n",
8097                         bpf_map__name(map));
8098                 return libbpf_err(-EINVAL);
8099         }
8100
8101         err = check_path(path);
8102         if (err)
8103                 return libbpf_err(err);
8104
8105         err = unlink(path);
8106         if (err != 0)
8107                 return libbpf_err(-errno);
8108
8109         map->pinned = false;
8110         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8111
8112         return 0;
8113 }
8114
8115 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8116 {
8117         char *new = NULL;
8118
8119         if (path) {
8120                 new = strdup(path);
8121                 if (!new)
8122                         return libbpf_err(-errno);
8123         }
8124
8125         free(map->pin_path);
8126         map->pin_path = new;
8127         return 0;
8128 }
8129
8130 __alias(bpf_map__pin_path)
8131 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8132
8133 const char *bpf_map__pin_path(const struct bpf_map *map)
8134 {
8135         return map->pin_path;
8136 }
8137
8138 bool bpf_map__is_pinned(const struct bpf_map *map)
8139 {
8140         return map->pinned;
8141 }
8142
8143 static void sanitize_pin_path(char *s)
8144 {
8145         /* bpffs disallows periods in path names */
8146         while (*s) {
8147                 if (*s == '.')
8148                         *s = '_';
8149                 s++;
8150         }
8151 }
8152
8153 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8154 {
8155         struct bpf_map *map;
8156         int err;
8157
8158         if (!obj)
8159                 return libbpf_err(-ENOENT);
8160
8161         if (!obj->loaded) {
8162                 pr_warn("object not yet loaded; load it first\n");
8163                 return libbpf_err(-ENOENT);
8164         }
8165
8166         bpf_object__for_each_map(map, obj) {
8167                 char *pin_path = NULL;
8168                 char buf[PATH_MAX];
8169
8170                 if (!map->autocreate)
8171                         continue;
8172
8173                 if (path) {
8174                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8175                         if (err)
8176                                 goto err_unpin_maps;
8177                         sanitize_pin_path(buf);
8178                         pin_path = buf;
8179                 } else if (!map->pin_path) {
8180                         continue;
8181                 }
8182
8183                 err = bpf_map__pin(map, pin_path);
8184                 if (err)
8185                         goto err_unpin_maps;
8186         }
8187
8188         return 0;
8189
8190 err_unpin_maps:
8191         while ((map = bpf_object__prev_map(obj, map))) {
8192                 if (!map->pin_path)
8193                         continue;
8194
8195                 bpf_map__unpin(map, NULL);
8196         }
8197
8198         return libbpf_err(err);
8199 }
8200
8201 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8202 {
8203         struct bpf_map *map;
8204         int err;
8205
8206         if (!obj)
8207                 return libbpf_err(-ENOENT);
8208
8209         bpf_object__for_each_map(map, obj) {
8210                 char *pin_path = NULL;
8211                 char buf[PATH_MAX];
8212
8213                 if (path) {
8214                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8215                         if (err)
8216                                 return libbpf_err(err);
8217                         sanitize_pin_path(buf);
8218                         pin_path = buf;
8219                 } else if (!map->pin_path) {
8220                         continue;
8221                 }
8222
8223                 err = bpf_map__unpin(map, pin_path);
8224                 if (err)
8225                         return libbpf_err(err);
8226         }
8227
8228         return 0;
8229 }
8230
8231 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8232 {
8233         struct bpf_program *prog;
8234         char buf[PATH_MAX];
8235         int err;
8236
8237         if (!obj)
8238                 return libbpf_err(-ENOENT);
8239
8240         if (!obj->loaded) {
8241                 pr_warn("object not yet loaded; load it first\n");
8242                 return libbpf_err(-ENOENT);
8243         }
8244
8245         bpf_object__for_each_program(prog, obj) {
8246                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8247                 if (err)
8248                         goto err_unpin_programs;
8249
8250                 err = bpf_program__pin(prog, buf);
8251                 if (err)
8252                         goto err_unpin_programs;
8253         }
8254
8255         return 0;
8256
8257 err_unpin_programs:
8258         while ((prog = bpf_object__prev_program(obj, prog))) {
8259                 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8260                         continue;
8261
8262                 bpf_program__unpin(prog, buf);
8263         }
8264
8265         return libbpf_err(err);
8266 }
8267
8268 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8269 {
8270         struct bpf_program *prog;
8271         int err;
8272
8273         if (!obj)
8274                 return libbpf_err(-ENOENT);
8275
8276         bpf_object__for_each_program(prog, obj) {
8277                 char buf[PATH_MAX];
8278
8279                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8280                 if (err)
8281                         return libbpf_err(err);
8282
8283                 err = bpf_program__unpin(prog, buf);
8284                 if (err)
8285                         return libbpf_err(err);
8286         }
8287
8288         return 0;
8289 }
8290
8291 int bpf_object__pin(struct bpf_object *obj, const char *path)
8292 {
8293         int err;
8294
8295         err = bpf_object__pin_maps(obj, path);
8296         if (err)
8297                 return libbpf_err(err);
8298
8299         err = bpf_object__pin_programs(obj, path);
8300         if (err) {
8301                 bpf_object__unpin_maps(obj, path);
8302                 return libbpf_err(err);
8303         }
8304
8305         return 0;
8306 }
8307
8308 static void bpf_map__destroy(struct bpf_map *map)
8309 {
8310         if (map->inner_map) {
8311                 bpf_map__destroy(map->inner_map);
8312                 zfree(&map->inner_map);
8313         }
8314
8315         zfree(&map->init_slots);
8316         map->init_slots_sz = 0;
8317
8318         if (map->mmaped) {
8319                 size_t mmap_sz;
8320
8321                 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
8322                 munmap(map->mmaped, mmap_sz);
8323                 map->mmaped = NULL;
8324         }
8325
8326         if (map->st_ops) {
8327                 zfree(&map->st_ops->data);
8328                 zfree(&map->st_ops->progs);
8329                 zfree(&map->st_ops->kern_func_off);
8330                 zfree(&map->st_ops);
8331         }
8332
8333         zfree(&map->name);
8334         zfree(&map->real_name);
8335         zfree(&map->pin_path);
8336
8337         if (map->fd >= 0)
8338                 zclose(map->fd);
8339 }
8340
8341 void bpf_object__close(struct bpf_object *obj)
8342 {
8343         size_t i;
8344
8345         if (IS_ERR_OR_NULL(obj))
8346                 return;
8347
8348         usdt_manager_free(obj->usdt_man);
8349         obj->usdt_man = NULL;
8350
8351         bpf_gen__free(obj->gen_loader);
8352         bpf_object__elf_finish(obj);
8353         bpf_object_unload(obj);
8354         btf__free(obj->btf);
8355         btf_ext__free(obj->btf_ext);
8356
8357         for (i = 0; i < obj->nr_maps; i++)
8358                 bpf_map__destroy(&obj->maps[i]);
8359
8360         zfree(&obj->btf_custom_path);
8361         zfree(&obj->kconfig);
8362         zfree(&obj->externs);
8363         obj->nr_extern = 0;
8364
8365         zfree(&obj->maps);
8366         obj->nr_maps = 0;
8367
8368         if (obj->programs && obj->nr_programs) {
8369                 for (i = 0; i < obj->nr_programs; i++)
8370                         bpf_program__exit(&obj->programs[i]);
8371         }
8372         zfree(&obj->programs);
8373
8374         free(obj);
8375 }
8376
8377 const char *bpf_object__name(const struct bpf_object *obj)
8378 {
8379         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8380 }
8381
8382 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8383 {
8384         return obj ? obj->kern_version : 0;
8385 }
8386
8387 struct btf *bpf_object__btf(const struct bpf_object *obj)
8388 {
8389         return obj ? obj->btf : NULL;
8390 }
8391
8392 int bpf_object__btf_fd(const struct bpf_object *obj)
8393 {
8394         return obj->btf ? btf__fd(obj->btf) : -1;
8395 }
8396
8397 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8398 {
8399         if (obj->loaded)
8400                 return libbpf_err(-EINVAL);
8401
8402         obj->kern_version = kern_version;
8403
8404         return 0;
8405 }
8406
8407 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8408 {
8409         struct bpf_gen *gen;
8410
8411         if (!opts)
8412                 return -EFAULT;
8413         if (!OPTS_VALID(opts, gen_loader_opts))
8414                 return -EINVAL;
8415         gen = calloc(sizeof(*gen), 1);
8416         if (!gen)
8417                 return -ENOMEM;
8418         gen->opts = opts;
8419         obj->gen_loader = gen;
8420         return 0;
8421 }
8422
8423 static struct bpf_program *
8424 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8425                     bool forward)
8426 {
8427         size_t nr_programs = obj->nr_programs;
8428         ssize_t idx;
8429
8430         if (!nr_programs)
8431                 return NULL;
8432
8433         if (!p)
8434                 /* Iter from the beginning */
8435                 return forward ? &obj->programs[0] :
8436                         &obj->programs[nr_programs - 1];
8437
8438         if (p->obj != obj) {
8439                 pr_warn("error: program handler doesn't match object\n");
8440                 return errno = EINVAL, NULL;
8441         }
8442
8443         idx = (p - obj->programs) + (forward ? 1 : -1);
8444         if (idx >= obj->nr_programs || idx < 0)
8445                 return NULL;
8446         return &obj->programs[idx];
8447 }
8448
8449 struct bpf_program *
8450 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8451 {
8452         struct bpf_program *prog = prev;
8453
8454         do {
8455                 prog = __bpf_program__iter(prog, obj, true);
8456         } while (prog && prog_is_subprog(obj, prog));
8457
8458         return prog;
8459 }
8460
8461 struct bpf_program *
8462 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8463 {
8464         struct bpf_program *prog = next;
8465
8466         do {
8467                 prog = __bpf_program__iter(prog, obj, false);
8468         } while (prog && prog_is_subprog(obj, prog));
8469
8470         return prog;
8471 }
8472
8473 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8474 {
8475         prog->prog_ifindex = ifindex;
8476 }
8477
8478 const char *bpf_program__name(const struct bpf_program *prog)
8479 {
8480         return prog->name;
8481 }
8482
8483 const char *bpf_program__section_name(const struct bpf_program *prog)
8484 {
8485         return prog->sec_name;
8486 }
8487
8488 bool bpf_program__autoload(const struct bpf_program *prog)
8489 {
8490         return prog->autoload;
8491 }
8492
8493 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8494 {
8495         if (prog->obj->loaded)
8496                 return libbpf_err(-EINVAL);
8497
8498         prog->autoload = autoload;
8499         return 0;
8500 }
8501
8502 bool bpf_program__autoattach(const struct bpf_program *prog)
8503 {
8504         return prog->autoattach;
8505 }
8506
8507 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
8508 {
8509         prog->autoattach = autoattach;
8510 }
8511
8512 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8513 {
8514         return prog->insns;
8515 }
8516
8517 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8518 {
8519         return prog->insns_cnt;
8520 }
8521
8522 int bpf_program__set_insns(struct bpf_program *prog,
8523                            struct bpf_insn *new_insns, size_t new_insn_cnt)
8524 {
8525         struct bpf_insn *insns;
8526
8527         if (prog->obj->loaded)
8528                 return -EBUSY;
8529
8530         insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8531         if (!insns) {
8532                 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8533                 return -ENOMEM;
8534         }
8535         memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8536
8537         prog->insns = insns;
8538         prog->insns_cnt = new_insn_cnt;
8539         return 0;
8540 }
8541
8542 int bpf_program__fd(const struct bpf_program *prog)
8543 {
8544         if (!prog)
8545                 return libbpf_err(-EINVAL);
8546
8547         if (prog->fd < 0)
8548                 return libbpf_err(-ENOENT);
8549
8550         return prog->fd;
8551 }
8552
8553 __alias(bpf_program__type)
8554 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8555
8556 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8557 {
8558         return prog->type;
8559 }
8560
8561 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8562 {
8563         if (prog->obj->loaded)
8564                 return libbpf_err(-EBUSY);
8565
8566         prog->type = type;
8567         prog->sec_def = NULL;
8568         return 0;
8569 }
8570
8571 __alias(bpf_program__expected_attach_type)
8572 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
8573
8574 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
8575 {
8576         return prog->expected_attach_type;
8577 }
8578
8579 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
8580                                            enum bpf_attach_type type)
8581 {
8582         if (prog->obj->loaded)
8583                 return libbpf_err(-EBUSY);
8584
8585         prog->expected_attach_type = type;
8586         return 0;
8587 }
8588
8589 __u32 bpf_program__flags(const struct bpf_program *prog)
8590 {
8591         return prog->prog_flags;
8592 }
8593
8594 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
8595 {
8596         if (prog->obj->loaded)
8597                 return libbpf_err(-EBUSY);
8598
8599         prog->prog_flags = flags;
8600         return 0;
8601 }
8602
8603 __u32 bpf_program__log_level(const struct bpf_program *prog)
8604 {
8605         return prog->log_level;
8606 }
8607
8608 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
8609 {
8610         if (prog->obj->loaded)
8611                 return libbpf_err(-EBUSY);
8612
8613         prog->log_level = log_level;
8614         return 0;
8615 }
8616
8617 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
8618 {
8619         *log_size = prog->log_size;
8620         return prog->log_buf;
8621 }
8622
8623 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
8624 {
8625         if (log_size && !log_buf)
8626                 return -EINVAL;
8627         if (prog->log_size > UINT_MAX)
8628                 return -EINVAL;
8629         if (prog->obj->loaded)
8630                 return -EBUSY;
8631
8632         prog->log_buf = log_buf;
8633         prog->log_size = log_size;
8634         return 0;
8635 }
8636
8637 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                        \
8638         .sec = (char *)sec_pfx,                                             \
8639         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
8640         .expected_attach_type = atype,                                      \
8641         .cookie = (long)(flags),                                            \
8642         .prog_prepare_load_fn = libbpf_prepare_prog_load,                   \
8643         __VA_ARGS__                                                         \
8644 }
8645
8646 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8647 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8648 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8649 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8650 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8651 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8652 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8653 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8654 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8655 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8656
8657 static const struct bpf_sec_def section_defs[] = {
8658         SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
8659         SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
8660         SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
8661         SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
8662         SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
8663         SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8664         SEC_DEF("kretprobe+",           KPROBE, 0, SEC_NONE, attach_kprobe),
8665         SEC_DEF("uretprobe+",           KPROBE, 0, SEC_NONE, attach_uprobe),
8666         SEC_DEF("uretprobe.s+",         KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8667         SEC_DEF("kprobe.multi+",        KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8668         SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8669         SEC_DEF("ksyscall+",            KPROBE, 0, SEC_NONE, attach_ksyscall),
8670         SEC_DEF("kretsyscall+",         KPROBE, 0, SEC_NONE, attach_ksyscall),
8671         SEC_DEF("usdt+",                KPROBE, 0, SEC_NONE, attach_usdt),
8672         SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE),
8673         SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE),
8674         SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE),
8675         SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
8676         SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
8677         SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8678         SEC_DEF("raw_tp+",              RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8679         SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8680         SEC_DEF("raw_tp.w+",            RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8681         SEC_DEF("tp_btf+",              TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
8682         SEC_DEF("fentry+",              TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
8683         SEC_DEF("fmod_ret+",            TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
8684         SEC_DEF("fexit+",               TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
8685         SEC_DEF("fentry.s+",            TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8686         SEC_DEF("fmod_ret.s+",          TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8687         SEC_DEF("fexit.s+",             TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8688         SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
8689         SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
8690         SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
8691         SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
8692         SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
8693         SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
8694         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
8695         SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
8696         SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
8697         SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
8698         SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
8699         SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
8700         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
8701         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
8702         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
8703         SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
8704         SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
8705         SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
8706         SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
8707         SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
8708         SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
8709         SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
8710         SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
8711         SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
8712         SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
8713         SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
8714         SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
8715         SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
8716         SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
8717         SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
8718         SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
8719         SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
8720         SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
8721         SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
8722         SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
8723         SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
8724         SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
8725         SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
8726         SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
8727         SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
8728         SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
8729         SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
8730         SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
8731         SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
8732         SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
8733         SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
8734         SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
8735         SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
8736         SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
8737         SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
8738         SEC_DEF("struct_ops.s+",        STRUCT_OPS, 0, SEC_SLEEPABLE),
8739         SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
8740         SEC_DEF("netfilter",            NETFILTER, BPF_NETFILTER, SEC_NONE),
8741 };
8742
8743 static size_t custom_sec_def_cnt;
8744 static struct bpf_sec_def *custom_sec_defs;
8745 static struct bpf_sec_def custom_fallback_def;
8746 static bool has_custom_fallback_def;
8747
8748 static int last_custom_sec_def_handler_id;
8749
8750 int libbpf_register_prog_handler(const char *sec,
8751                                  enum bpf_prog_type prog_type,
8752                                  enum bpf_attach_type exp_attach_type,
8753                                  const struct libbpf_prog_handler_opts *opts)
8754 {
8755         struct bpf_sec_def *sec_def;
8756
8757         if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
8758                 return libbpf_err(-EINVAL);
8759
8760         if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
8761                 return libbpf_err(-E2BIG);
8762
8763         if (sec) {
8764                 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
8765                                               sizeof(*sec_def));
8766                 if (!sec_def)
8767                         return libbpf_err(-ENOMEM);
8768
8769                 custom_sec_defs = sec_def;
8770                 sec_def = &custom_sec_defs[custom_sec_def_cnt];
8771         } else {
8772                 if (has_custom_fallback_def)
8773                         return libbpf_err(-EBUSY);
8774
8775                 sec_def = &custom_fallback_def;
8776         }
8777
8778         sec_def->sec = sec ? strdup(sec) : NULL;
8779         if (sec && !sec_def->sec)
8780                 return libbpf_err(-ENOMEM);
8781
8782         sec_def->prog_type = prog_type;
8783         sec_def->expected_attach_type = exp_attach_type;
8784         sec_def->cookie = OPTS_GET(opts, cookie, 0);
8785
8786         sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
8787         sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
8788         sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
8789
8790         sec_def->handler_id = ++last_custom_sec_def_handler_id;
8791
8792         if (sec)
8793                 custom_sec_def_cnt++;
8794         else
8795                 has_custom_fallback_def = true;
8796
8797         return sec_def->handler_id;
8798 }
8799
8800 int libbpf_unregister_prog_handler(int handler_id)
8801 {
8802         struct bpf_sec_def *sec_defs;
8803         int i;
8804
8805         if (handler_id <= 0)
8806                 return libbpf_err(-EINVAL);
8807
8808         if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
8809                 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
8810                 has_custom_fallback_def = false;
8811                 return 0;
8812         }
8813
8814         for (i = 0; i < custom_sec_def_cnt; i++) {
8815                 if (custom_sec_defs[i].handler_id == handler_id)
8816                         break;
8817         }
8818
8819         if (i == custom_sec_def_cnt)
8820                 return libbpf_err(-ENOENT);
8821
8822         free(custom_sec_defs[i].sec);
8823         for (i = i + 1; i < custom_sec_def_cnt; i++)
8824                 custom_sec_defs[i - 1] = custom_sec_defs[i];
8825         custom_sec_def_cnt--;
8826
8827         /* try to shrink the array, but it's ok if we couldn't */
8828         sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
8829         if (sec_defs)
8830                 custom_sec_defs = sec_defs;
8831
8832         return 0;
8833 }
8834
8835 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
8836 {
8837         size_t len = strlen(sec_def->sec);
8838
8839         /* "type/" always has to have proper SEC("type/extras") form */
8840         if (sec_def->sec[len - 1] == '/') {
8841                 if (str_has_pfx(sec_name, sec_def->sec))
8842                         return true;
8843                 return false;
8844         }
8845
8846         /* "type+" means it can be either exact SEC("type") or
8847          * well-formed SEC("type/extras") with proper '/' separator
8848          */
8849         if (sec_def->sec[len - 1] == '+') {
8850                 len--;
8851                 /* not even a prefix */
8852                 if (strncmp(sec_name, sec_def->sec, len) != 0)
8853                         return false;
8854                 /* exact match or has '/' separator */
8855                 if (sec_name[len] == '\0' || sec_name[len] == '/')
8856                         return true;
8857                 return false;
8858         }
8859
8860         return strcmp(sec_name, sec_def->sec) == 0;
8861 }
8862
8863 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8864 {
8865         const struct bpf_sec_def *sec_def;
8866         int i, n;
8867
8868         n = custom_sec_def_cnt;
8869         for (i = 0; i < n; i++) {
8870                 sec_def = &custom_sec_defs[i];
8871                 if (sec_def_matches(sec_def, sec_name))
8872                         return sec_def;
8873         }
8874
8875         n = ARRAY_SIZE(section_defs);
8876         for (i = 0; i < n; i++) {
8877                 sec_def = &section_defs[i];
8878                 if (sec_def_matches(sec_def, sec_name))
8879                         return sec_def;
8880         }
8881
8882         if (has_custom_fallback_def)
8883                 return &custom_fallback_def;
8884
8885         return NULL;
8886 }
8887
8888 #define MAX_TYPE_NAME_SIZE 32
8889
8890 static char *libbpf_get_type_names(bool attach_type)
8891 {
8892         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8893         char *buf;
8894
8895         buf = malloc(len);
8896         if (!buf)
8897                 return NULL;
8898
8899         buf[0] = '\0';
8900         /* Forge string buf with all available names */
8901         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8902                 const struct bpf_sec_def *sec_def = &section_defs[i];
8903
8904                 if (attach_type) {
8905                         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
8906                                 continue;
8907
8908                         if (!(sec_def->cookie & SEC_ATTACHABLE))
8909                                 continue;
8910                 }
8911
8912                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8913                         free(buf);
8914                         return NULL;
8915                 }
8916                 strcat(buf, " ");
8917                 strcat(buf, section_defs[i].sec);
8918         }
8919
8920         return buf;
8921 }
8922
8923 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8924                              enum bpf_attach_type *expected_attach_type)
8925 {
8926         const struct bpf_sec_def *sec_def;
8927         char *type_names;
8928
8929         if (!name)
8930                 return libbpf_err(-EINVAL);
8931
8932         sec_def = find_sec_def(name);
8933         if (sec_def) {
8934                 *prog_type = sec_def->prog_type;
8935                 *expected_attach_type = sec_def->expected_attach_type;
8936                 return 0;
8937         }
8938
8939         pr_debug("failed to guess program type from ELF section '%s'\n", name);
8940         type_names = libbpf_get_type_names(false);
8941         if (type_names != NULL) {
8942                 pr_debug("supported section(type) names are:%s\n", type_names);
8943                 free(type_names);
8944         }
8945
8946         return libbpf_err(-ESRCH);
8947 }
8948
8949 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
8950 {
8951         if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
8952                 return NULL;
8953
8954         return attach_type_name[t];
8955 }
8956
8957 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
8958 {
8959         if (t < 0 || t >= ARRAY_SIZE(link_type_name))
8960                 return NULL;
8961
8962         return link_type_name[t];
8963 }
8964
8965 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
8966 {
8967         if (t < 0 || t >= ARRAY_SIZE(map_type_name))
8968                 return NULL;
8969
8970         return map_type_name[t];
8971 }
8972
8973 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
8974 {
8975         if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
8976                 return NULL;
8977
8978         return prog_type_name[t];
8979 }
8980
8981 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8982                                                      int sec_idx,
8983                                                      size_t offset)
8984 {
8985         struct bpf_map *map;
8986         size_t i;
8987
8988         for (i = 0; i < obj->nr_maps; i++) {
8989                 map = &obj->maps[i];
8990                 if (!bpf_map__is_struct_ops(map))
8991                         continue;
8992                 if (map->sec_idx == sec_idx &&
8993                     map->sec_offset <= offset &&
8994                     offset - map->sec_offset < map->def.value_size)
8995                         return map;
8996         }
8997
8998         return NULL;
8999 }
9000
9001 /* Collect the reloc from ELF and populate the st_ops->progs[] */
9002 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9003                                             Elf64_Shdr *shdr, Elf_Data *data)
9004 {
9005         const struct btf_member *member;
9006         struct bpf_struct_ops *st_ops;
9007         struct bpf_program *prog;
9008         unsigned int shdr_idx;
9009         const struct btf *btf;
9010         struct bpf_map *map;
9011         unsigned int moff, insn_idx;
9012         const char *name;
9013         __u32 member_idx;
9014         Elf64_Sym *sym;
9015         Elf64_Rel *rel;
9016         int i, nrels;
9017
9018         btf = obj->btf;
9019         nrels = shdr->sh_size / shdr->sh_entsize;
9020         for (i = 0; i < nrels; i++) {
9021                 rel = elf_rel_by_idx(data, i);
9022                 if (!rel) {
9023                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9024                         return -LIBBPF_ERRNO__FORMAT;
9025                 }
9026
9027                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9028                 if (!sym) {
9029                         pr_warn("struct_ops reloc: symbol %zx not found\n",
9030                                 (size_t)ELF64_R_SYM(rel->r_info));
9031                         return -LIBBPF_ERRNO__FORMAT;
9032                 }
9033
9034                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9035                 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9036                 if (!map) {
9037                         pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9038                                 (size_t)rel->r_offset);
9039                         return -EINVAL;
9040                 }
9041
9042                 moff = rel->r_offset - map->sec_offset;
9043                 shdr_idx = sym->st_shndx;
9044                 st_ops = map->st_ops;
9045                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9046                          map->name,
9047                          (long long)(rel->r_info >> 32),
9048                          (long long)sym->st_value,
9049                          shdr_idx, (size_t)rel->r_offset,
9050                          map->sec_offset, sym->st_name, name);
9051
9052                 if (shdr_idx >= SHN_LORESERVE) {
9053                         pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9054                                 map->name, (size_t)rel->r_offset, shdr_idx);
9055                         return -LIBBPF_ERRNO__RELOC;
9056                 }
9057                 if (sym->st_value % BPF_INSN_SZ) {
9058                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9059                                 map->name, (unsigned long long)sym->st_value);
9060                         return -LIBBPF_ERRNO__FORMAT;
9061                 }
9062                 insn_idx = sym->st_value / BPF_INSN_SZ;
9063
9064                 member = find_member_by_offset(st_ops->type, moff * 8);
9065                 if (!member) {
9066                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9067                                 map->name, moff);
9068                         return -EINVAL;
9069                 }
9070                 member_idx = member - btf_members(st_ops->type);
9071                 name = btf__name_by_offset(btf, member->name_off);
9072
9073                 if (!resolve_func_ptr(btf, member->type, NULL)) {
9074                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9075                                 map->name, name);
9076                         return -EINVAL;
9077                 }
9078
9079                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9080                 if (!prog) {
9081                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9082                                 map->name, shdr_idx, name);
9083                         return -EINVAL;
9084                 }
9085
9086                 /* prevent the use of BPF prog with invalid type */
9087                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9088                         pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9089                                 map->name, prog->name);
9090                         return -EINVAL;
9091                 }
9092
9093                 /* if we haven't yet processed this BPF program, record proper
9094                  * attach_btf_id and member_idx
9095                  */
9096                 if (!prog->attach_btf_id) {
9097                         prog->attach_btf_id = st_ops->type_id;
9098                         prog->expected_attach_type = member_idx;
9099                 }
9100
9101                 /* struct_ops BPF prog can be re-used between multiple
9102                  * .struct_ops & .struct_ops.link as long as it's the
9103                  * same struct_ops struct definition and the same
9104                  * function pointer field
9105                  */
9106                 if (prog->attach_btf_id != st_ops->type_id ||
9107                     prog->expected_attach_type != member_idx) {
9108                         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
9109                                 map->name, prog->name, prog->sec_name, prog->type,
9110                                 prog->attach_btf_id, prog->expected_attach_type, name);
9111                         return -EINVAL;
9112                 }
9113
9114                 st_ops->progs[member_idx] = prog;
9115         }
9116
9117         return 0;
9118 }
9119
9120 #define BTF_TRACE_PREFIX "btf_trace_"
9121 #define BTF_LSM_PREFIX "bpf_lsm_"
9122 #define BTF_ITER_PREFIX "bpf_iter_"
9123 #define BTF_MAX_NAME_SIZE 128
9124
9125 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9126                                 const char **prefix, int *kind)
9127 {
9128         switch (attach_type) {
9129         case BPF_TRACE_RAW_TP:
9130                 *prefix = BTF_TRACE_PREFIX;
9131                 *kind = BTF_KIND_TYPEDEF;
9132                 break;
9133         case BPF_LSM_MAC:
9134         case BPF_LSM_CGROUP:
9135                 *prefix = BTF_LSM_PREFIX;
9136                 *kind = BTF_KIND_FUNC;
9137                 break;
9138         case BPF_TRACE_ITER:
9139                 *prefix = BTF_ITER_PREFIX;
9140                 *kind = BTF_KIND_FUNC;
9141                 break;
9142         default:
9143                 *prefix = "";
9144                 *kind = BTF_KIND_FUNC;
9145         }
9146 }
9147
9148 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9149                                    const char *name, __u32 kind)
9150 {
9151         char btf_type_name[BTF_MAX_NAME_SIZE];
9152         int ret;
9153
9154         ret = snprintf(btf_type_name, sizeof(btf_type_name),
9155                        "%s%s", prefix, name);
9156         /* snprintf returns the number of characters written excluding the
9157          * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9158          * indicates truncation.
9159          */
9160         if (ret < 0 || ret >= sizeof(btf_type_name))
9161                 return -ENAMETOOLONG;
9162         return btf__find_by_name_kind(btf, btf_type_name, kind);
9163 }
9164
9165 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9166                                      enum bpf_attach_type attach_type)
9167 {
9168         const char *prefix;
9169         int kind;
9170
9171         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9172         return find_btf_by_prefix_kind(btf, prefix, name, kind);
9173 }
9174
9175 int libbpf_find_vmlinux_btf_id(const char *name,
9176                                enum bpf_attach_type attach_type)
9177 {
9178         struct btf *btf;
9179         int err;
9180
9181         btf = btf__load_vmlinux_btf();
9182         err = libbpf_get_error(btf);
9183         if (err) {
9184                 pr_warn("vmlinux BTF is not found\n");
9185                 return libbpf_err(err);
9186         }
9187
9188         err = find_attach_btf_id(btf, name, attach_type);
9189         if (err <= 0)
9190                 pr_warn("%s is not found in vmlinux BTF\n", name);
9191
9192         btf__free(btf);
9193         return libbpf_err(err);
9194 }
9195
9196 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9197 {
9198         struct bpf_prog_info info;
9199         __u32 info_len = sizeof(info);
9200         struct btf *btf;
9201         int err;
9202
9203         memset(&info, 0, info_len);
9204         err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9205         if (err) {
9206                 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9207                         attach_prog_fd, err);
9208                 return err;
9209         }
9210
9211         err = -EINVAL;
9212         if (!info.btf_id) {
9213                 pr_warn("The target program doesn't have BTF\n");
9214                 goto out;
9215         }
9216         btf = btf__load_from_kernel_by_id(info.btf_id);
9217         err = libbpf_get_error(btf);
9218         if (err) {
9219                 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9220                 goto out;
9221         }
9222         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9223         btf__free(btf);
9224         if (err <= 0) {
9225                 pr_warn("%s is not found in prog's BTF\n", name);
9226                 goto out;
9227         }
9228 out:
9229         return err;
9230 }
9231
9232 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9233                               enum bpf_attach_type attach_type,
9234                               int *btf_obj_fd, int *btf_type_id)
9235 {
9236         int ret, i;
9237
9238         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9239         if (ret > 0) {
9240                 *btf_obj_fd = 0; /* vmlinux BTF */
9241                 *btf_type_id = ret;
9242                 return 0;
9243         }
9244         if (ret != -ENOENT)
9245                 return ret;
9246
9247         ret = load_module_btfs(obj);
9248         if (ret)
9249                 return ret;
9250
9251         for (i = 0; i < obj->btf_module_cnt; i++) {
9252                 const struct module_btf *mod = &obj->btf_modules[i];
9253
9254                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9255                 if (ret > 0) {
9256                         *btf_obj_fd = mod->fd;
9257                         *btf_type_id = ret;
9258                         return 0;
9259                 }
9260                 if (ret == -ENOENT)
9261                         continue;
9262
9263                 return ret;
9264         }
9265
9266         return -ESRCH;
9267 }
9268
9269 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9270                                      int *btf_obj_fd, int *btf_type_id)
9271 {
9272         enum bpf_attach_type attach_type = prog->expected_attach_type;
9273         __u32 attach_prog_fd = prog->attach_prog_fd;
9274         int err = 0;
9275
9276         /* BPF program's BTF ID */
9277         if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9278                 if (!attach_prog_fd) {
9279                         pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9280                         return -EINVAL;
9281                 }
9282                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9283                 if (err < 0) {
9284                         pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9285                                  prog->name, attach_prog_fd, attach_name, err);
9286                         return err;
9287                 }
9288                 *btf_obj_fd = 0;
9289                 *btf_type_id = err;
9290                 return 0;
9291         }
9292
9293         /* kernel/module BTF ID */
9294         if (prog->obj->gen_loader) {
9295                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9296                 *btf_obj_fd = 0;
9297                 *btf_type_id = 1;
9298         } else {
9299                 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9300         }
9301         if (err) {
9302                 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9303                         prog->name, attach_name, err);
9304                 return err;
9305         }
9306         return 0;
9307 }
9308
9309 int libbpf_attach_type_by_name(const char *name,
9310                                enum bpf_attach_type *attach_type)
9311 {
9312         char *type_names;
9313         const struct bpf_sec_def *sec_def;
9314
9315         if (!name)
9316                 return libbpf_err(-EINVAL);
9317
9318         sec_def = find_sec_def(name);
9319         if (!sec_def) {
9320                 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9321                 type_names = libbpf_get_type_names(true);
9322                 if (type_names != NULL) {
9323                         pr_debug("attachable section(type) names are:%s\n", type_names);
9324                         free(type_names);
9325                 }
9326
9327                 return libbpf_err(-EINVAL);
9328         }
9329
9330         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9331                 return libbpf_err(-EINVAL);
9332         if (!(sec_def->cookie & SEC_ATTACHABLE))
9333                 return libbpf_err(-EINVAL);
9334
9335         *attach_type = sec_def->expected_attach_type;
9336         return 0;
9337 }
9338
9339 int bpf_map__fd(const struct bpf_map *map)
9340 {
9341         return map ? map->fd : libbpf_err(-EINVAL);
9342 }
9343
9344 static bool map_uses_real_name(const struct bpf_map *map)
9345 {
9346         /* Since libbpf started to support custom .data.* and .rodata.* maps,
9347          * their user-visible name differs from kernel-visible name. Users see
9348          * such map's corresponding ELF section name as a map name.
9349          * This check distinguishes .data/.rodata from .data.* and .rodata.*
9350          * maps to know which name has to be returned to the user.
9351          */
9352         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9353                 return true;
9354         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9355                 return true;
9356         return false;
9357 }
9358
9359 const char *bpf_map__name(const struct bpf_map *map)
9360 {
9361         if (!map)
9362                 return NULL;
9363
9364         if (map_uses_real_name(map))
9365                 return map->real_name;
9366
9367         return map->name;
9368 }
9369
9370 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9371 {
9372         return map->def.type;
9373 }
9374
9375 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9376 {
9377         if (map->fd >= 0)
9378                 return libbpf_err(-EBUSY);
9379         map->def.type = type;
9380         return 0;
9381 }
9382
9383 __u32 bpf_map__map_flags(const struct bpf_map *map)
9384 {
9385         return map->def.map_flags;
9386 }
9387
9388 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9389 {
9390         if (map->fd >= 0)
9391                 return libbpf_err(-EBUSY);
9392         map->def.map_flags = flags;
9393         return 0;
9394 }
9395
9396 __u64 bpf_map__map_extra(const struct bpf_map *map)
9397 {
9398         return map->map_extra;
9399 }
9400
9401 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9402 {
9403         if (map->fd >= 0)
9404                 return libbpf_err(-EBUSY);
9405         map->map_extra = map_extra;
9406         return 0;
9407 }
9408
9409 __u32 bpf_map__numa_node(const struct bpf_map *map)
9410 {
9411         return map->numa_node;
9412 }
9413
9414 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9415 {
9416         if (map->fd >= 0)
9417                 return libbpf_err(-EBUSY);
9418         map->numa_node = numa_node;
9419         return 0;
9420 }
9421
9422 __u32 bpf_map__key_size(const struct bpf_map *map)
9423 {
9424         return map->def.key_size;
9425 }
9426
9427 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9428 {
9429         if (map->fd >= 0)
9430                 return libbpf_err(-EBUSY);
9431         map->def.key_size = size;
9432         return 0;
9433 }
9434
9435 __u32 bpf_map__value_size(const struct bpf_map *map)
9436 {
9437         return map->def.value_size;
9438 }
9439
9440 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
9441 {
9442         struct btf *btf;
9443         struct btf_type *datasec_type, *var_type;
9444         struct btf_var_secinfo *var;
9445         const struct btf_type *array_type;
9446         const struct btf_array *array;
9447         int vlen, element_sz, new_array_id;
9448         __u32 nr_elements;
9449
9450         /* check btf existence */
9451         btf = bpf_object__btf(map->obj);
9452         if (!btf)
9453                 return -ENOENT;
9454
9455         /* verify map is datasec */
9456         datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
9457         if (!btf_is_datasec(datasec_type)) {
9458                 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
9459                         bpf_map__name(map));
9460                 return -EINVAL;
9461         }
9462
9463         /* verify datasec has at least one var */
9464         vlen = btf_vlen(datasec_type);
9465         if (vlen == 0) {
9466                 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
9467                         bpf_map__name(map));
9468                 return -EINVAL;
9469         }
9470
9471         /* verify last var in the datasec is an array */
9472         var = &btf_var_secinfos(datasec_type)[vlen - 1];
9473         var_type = btf_type_by_id(btf, var->type);
9474         array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
9475         if (!btf_is_array(array_type)) {
9476                 pr_warn("map '%s': cannot be resized, last var must be an array\n",
9477                         bpf_map__name(map));
9478                 return -EINVAL;
9479         }
9480
9481         /* verify request size aligns with array */
9482         array = btf_array(array_type);
9483         element_sz = btf__resolve_size(btf, array->type);
9484         if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
9485                 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
9486                         bpf_map__name(map), element_sz, size);
9487                 return -EINVAL;
9488         }
9489
9490         /* create a new array based on the existing array, but with new length */
9491         nr_elements = (size - var->offset) / element_sz;
9492         new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
9493         if (new_array_id < 0)
9494                 return new_array_id;
9495
9496         /* adding a new btf type invalidates existing pointers to btf objects,
9497          * so refresh pointers before proceeding
9498          */
9499         datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
9500         var = &btf_var_secinfos(datasec_type)[vlen - 1];
9501         var_type = btf_type_by_id(btf, var->type);
9502
9503         /* finally update btf info */
9504         datasec_type->size = size;
9505         var->size = size - var->offset;
9506         var_type->type = new_array_id;
9507
9508         return 0;
9509 }
9510
9511 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9512 {
9513         if (map->fd >= 0)
9514                 return libbpf_err(-EBUSY);
9515
9516         if (map->mmaped) {
9517                 int err;
9518                 size_t mmap_old_sz, mmap_new_sz;
9519
9520                 mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
9521                 mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
9522                 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
9523                 if (err) {
9524                         pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
9525                                 bpf_map__name(map), err);
9526                         return err;
9527                 }
9528                 err = map_btf_datasec_resize(map, size);
9529                 if (err && err != -ENOENT) {
9530                         pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
9531                                 bpf_map__name(map), err);
9532                         map->btf_value_type_id = 0;
9533                         map->btf_key_type_id = 0;
9534                 }
9535         }
9536
9537         map->def.value_size = size;
9538         return 0;
9539 }
9540
9541 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9542 {
9543         return map ? map->btf_key_type_id : 0;
9544 }
9545
9546 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9547 {
9548         return map ? map->btf_value_type_id : 0;
9549 }
9550
9551 int bpf_map__set_initial_value(struct bpf_map *map,
9552                                const void *data, size_t size)
9553 {
9554         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9555             size != map->def.value_size || map->fd >= 0)
9556                 return libbpf_err(-EINVAL);
9557
9558         memcpy(map->mmaped, data, size);
9559         return 0;
9560 }
9561
9562 void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9563 {
9564         if (!map->mmaped)
9565                 return NULL;
9566         *psize = map->def.value_size;
9567         return map->mmaped;
9568 }
9569
9570 bool bpf_map__is_internal(const struct bpf_map *map)
9571 {
9572         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9573 }
9574
9575 __u32 bpf_map__ifindex(const struct bpf_map *map)
9576 {
9577         return map->map_ifindex;
9578 }
9579
9580 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9581 {
9582         if (map->fd >= 0)
9583                 return libbpf_err(-EBUSY);
9584         map->map_ifindex = ifindex;
9585         return 0;
9586 }
9587
9588 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9589 {
9590         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9591                 pr_warn("error: unsupported map type\n");
9592                 return libbpf_err(-EINVAL);
9593         }
9594         if (map->inner_map_fd != -1) {
9595                 pr_warn("error: inner_map_fd already specified\n");
9596                 return libbpf_err(-EINVAL);
9597         }
9598         if (map->inner_map) {
9599                 bpf_map__destroy(map->inner_map);
9600                 zfree(&map->inner_map);
9601         }
9602         map->inner_map_fd = fd;
9603         return 0;
9604 }
9605
9606 static struct bpf_map *
9607 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9608 {
9609         ssize_t idx;
9610         struct bpf_map *s, *e;
9611
9612         if (!obj || !obj->maps)
9613                 return errno = EINVAL, NULL;
9614
9615         s = obj->maps;
9616         e = obj->maps + obj->nr_maps;
9617
9618         if ((m < s) || (m >= e)) {
9619                 pr_warn("error in %s: map handler doesn't belong to object\n",
9620                          __func__);
9621                 return errno = EINVAL, NULL;
9622         }
9623
9624         idx = (m - obj->maps) + i;
9625         if (idx >= obj->nr_maps || idx < 0)
9626                 return NULL;
9627         return &obj->maps[idx];
9628 }
9629
9630 struct bpf_map *
9631 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
9632 {
9633         if (prev == NULL)
9634                 return obj->maps;
9635
9636         return __bpf_map__iter(prev, obj, 1);
9637 }
9638
9639 struct bpf_map *
9640 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
9641 {
9642         if (next == NULL) {
9643                 if (!obj->nr_maps)
9644                         return NULL;
9645                 return obj->maps + obj->nr_maps - 1;
9646         }
9647
9648         return __bpf_map__iter(next, obj, -1);
9649 }
9650
9651 struct bpf_map *
9652 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9653 {
9654         struct bpf_map *pos;
9655
9656         bpf_object__for_each_map(pos, obj) {
9657                 /* if it's a special internal map name (which always starts
9658                  * with dot) then check if that special name matches the
9659                  * real map name (ELF section name)
9660                  */
9661                 if (name[0] == '.') {
9662                         if (pos->real_name && strcmp(pos->real_name, name) == 0)
9663                                 return pos;
9664                         continue;
9665                 }
9666                 /* otherwise map name has to be an exact match */
9667                 if (map_uses_real_name(pos)) {
9668                         if (strcmp(pos->real_name, name) == 0)
9669                                 return pos;
9670                         continue;
9671                 }
9672                 if (strcmp(pos->name, name) == 0)
9673                         return pos;
9674         }
9675         return errno = ENOENT, NULL;
9676 }
9677
9678 int
9679 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9680 {
9681         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9682 }
9683
9684 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
9685                            size_t value_sz, bool check_value_sz)
9686 {
9687         if (map->fd <= 0)
9688                 return -ENOENT;
9689
9690         if (map->def.key_size != key_sz) {
9691                 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
9692                         map->name, key_sz, map->def.key_size);
9693                 return -EINVAL;
9694         }
9695
9696         if (!check_value_sz)
9697                 return 0;
9698
9699         switch (map->def.type) {
9700         case BPF_MAP_TYPE_PERCPU_ARRAY:
9701         case BPF_MAP_TYPE_PERCPU_HASH:
9702         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
9703         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
9704                 int num_cpu = libbpf_num_possible_cpus();
9705                 size_t elem_sz = roundup(map->def.value_size, 8);
9706
9707                 if (value_sz != num_cpu * elem_sz) {
9708                         pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
9709                                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
9710                         return -EINVAL;
9711                 }
9712                 break;
9713         }
9714         default:
9715                 if (map->def.value_size != value_sz) {
9716                         pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
9717                                 map->name, value_sz, map->def.value_size);
9718                         return -EINVAL;
9719                 }
9720                 break;
9721         }
9722         return 0;
9723 }
9724
9725 int bpf_map__lookup_elem(const struct bpf_map *map,
9726                          const void *key, size_t key_sz,
9727                          void *value, size_t value_sz, __u64 flags)
9728 {
9729         int err;
9730
9731         err = validate_map_op(map, key_sz, value_sz, true);
9732         if (err)
9733                 return libbpf_err(err);
9734
9735         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
9736 }
9737
9738 int bpf_map__update_elem(const struct bpf_map *map,
9739                          const void *key, size_t key_sz,
9740                          const void *value, size_t value_sz, __u64 flags)
9741 {
9742         int err;
9743
9744         err = validate_map_op(map, key_sz, value_sz, true);
9745         if (err)
9746                 return libbpf_err(err);
9747
9748         return bpf_map_update_elem(map->fd, key, value, flags);
9749 }
9750
9751 int bpf_map__delete_elem(const struct bpf_map *map,
9752                          const void *key, size_t key_sz, __u64 flags)
9753 {
9754         int err;
9755
9756         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9757         if (err)
9758                 return libbpf_err(err);
9759
9760         return bpf_map_delete_elem_flags(map->fd, key, flags);
9761 }
9762
9763 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
9764                                     const void *key, size_t key_sz,
9765                                     void *value, size_t value_sz, __u64 flags)
9766 {
9767         int err;
9768
9769         err = validate_map_op(map, key_sz, value_sz, true);
9770         if (err)
9771                 return libbpf_err(err);
9772
9773         return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
9774 }
9775
9776 int bpf_map__get_next_key(const struct bpf_map *map,
9777                           const void *cur_key, void *next_key, size_t key_sz)
9778 {
9779         int err;
9780
9781         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9782         if (err)
9783                 return libbpf_err(err);
9784
9785         return bpf_map_get_next_key(map->fd, cur_key, next_key);
9786 }
9787
9788 long libbpf_get_error(const void *ptr)
9789 {
9790         if (!IS_ERR_OR_NULL(ptr))
9791                 return 0;
9792
9793         if (IS_ERR(ptr))
9794                 errno = -PTR_ERR(ptr);
9795
9796         /* If ptr == NULL, then errno should be already set by the failing
9797          * API, because libbpf never returns NULL on success and it now always
9798          * sets errno on error. So no extra errno handling for ptr == NULL
9799          * case.
9800          */
9801         return -errno;
9802 }
9803
9804 /* Replace link's underlying BPF program with the new one */
9805 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
9806 {
9807         int ret;
9808
9809         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9810         return libbpf_err_errno(ret);
9811 }
9812
9813 /* Release "ownership" of underlying BPF resource (typically, BPF program
9814  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9815  * link, when destructed through bpf_link__destroy() call won't attempt to
9816  * detach/unregisted that BPF resource. This is useful in situations where,
9817  * say, attached BPF program has to outlive userspace program that attached it
9818  * in the system. Depending on type of BPF program, though, there might be
9819  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9820  * exit of userspace program doesn't trigger automatic detachment and clean up
9821  * inside the kernel.
9822  */
9823 void bpf_link__disconnect(struct bpf_link *link)
9824 {
9825         link->disconnected = true;
9826 }
9827
9828 int bpf_link__destroy(struct bpf_link *link)
9829 {
9830         int err = 0;
9831
9832         if (IS_ERR_OR_NULL(link))
9833                 return 0;
9834
9835         if (!link->disconnected && link->detach)
9836                 err = link->detach(link);
9837         if (link->pin_path)
9838                 free(link->pin_path);
9839         if (link->dealloc)
9840                 link->dealloc(link);
9841         else
9842                 free(link);
9843
9844         return libbpf_err(err);
9845 }
9846
9847 int bpf_link__fd(const struct bpf_link *link)
9848 {
9849         return link->fd;
9850 }
9851
9852 const char *bpf_link__pin_path(const struct bpf_link *link)
9853 {
9854         return link->pin_path;
9855 }
9856
9857 static int bpf_link__detach_fd(struct bpf_link *link)
9858 {
9859         return libbpf_err_errno(close(link->fd));
9860 }
9861
9862 struct bpf_link *bpf_link__open(const char *path)
9863 {
9864         struct bpf_link *link;
9865         int fd;
9866
9867         fd = bpf_obj_get(path);
9868         if (fd < 0) {
9869                 fd = -errno;
9870                 pr_warn("failed to open link at %s: %d\n", path, fd);
9871                 return libbpf_err_ptr(fd);
9872         }
9873
9874         link = calloc(1, sizeof(*link));
9875         if (!link) {
9876                 close(fd);
9877                 return libbpf_err_ptr(-ENOMEM);
9878         }
9879         link->detach = &bpf_link__detach_fd;
9880         link->fd = fd;
9881
9882         link->pin_path = strdup(path);
9883         if (!link->pin_path) {
9884                 bpf_link__destroy(link);
9885                 return libbpf_err_ptr(-ENOMEM);
9886         }
9887
9888         return link;
9889 }
9890
9891 int bpf_link__detach(struct bpf_link *link)
9892 {
9893         return bpf_link_detach(link->fd) ? -errno : 0;
9894 }
9895
9896 int bpf_link__pin(struct bpf_link *link, const char *path)
9897 {
9898         int err;
9899
9900         if (link->pin_path)
9901                 return libbpf_err(-EBUSY);
9902         err = make_parent_dir(path);
9903         if (err)
9904                 return libbpf_err(err);
9905         err = check_path(path);
9906         if (err)
9907                 return libbpf_err(err);
9908
9909         link->pin_path = strdup(path);
9910         if (!link->pin_path)
9911                 return libbpf_err(-ENOMEM);
9912
9913         if (bpf_obj_pin(link->fd, link->pin_path)) {
9914                 err = -errno;
9915                 zfree(&link->pin_path);
9916                 return libbpf_err(err);
9917         }
9918
9919         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9920         return 0;
9921 }
9922
9923 int bpf_link__unpin(struct bpf_link *link)
9924 {
9925         int err;
9926
9927         if (!link->pin_path)
9928                 return libbpf_err(-EINVAL);
9929
9930         err = unlink(link->pin_path);
9931         if (err != 0)
9932                 return -errno;
9933
9934         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9935         zfree(&link->pin_path);
9936         return 0;
9937 }
9938
9939 struct bpf_link_perf {
9940         struct bpf_link link;
9941         int perf_event_fd;
9942         /* legacy kprobe support: keep track of probe identifier and type */
9943         char *legacy_probe_name;
9944         bool legacy_is_kprobe;
9945         bool legacy_is_retprobe;
9946 };
9947
9948 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
9949 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
9950
9951 static int bpf_link_perf_detach(struct bpf_link *link)
9952 {
9953         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9954         int err = 0;
9955
9956         if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
9957                 err = -errno;
9958
9959         if (perf_link->perf_event_fd != link->fd)
9960                 close(perf_link->perf_event_fd);
9961         close(link->fd);
9962
9963         /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
9964         if (perf_link->legacy_probe_name) {
9965                 if (perf_link->legacy_is_kprobe) {
9966                         err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
9967                                                          perf_link->legacy_is_retprobe);
9968                 } else {
9969                         err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
9970                                                          perf_link->legacy_is_retprobe);
9971                 }
9972         }
9973
9974         return err;
9975 }
9976
9977 static void bpf_link_perf_dealloc(struct bpf_link *link)
9978 {
9979         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9980
9981         free(perf_link->legacy_probe_name);
9982         free(perf_link);
9983 }
9984
9985 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
9986                                                      const struct bpf_perf_event_opts *opts)
9987 {
9988         char errmsg[STRERR_BUFSIZE];
9989         struct bpf_link_perf *link;
9990         int prog_fd, link_fd = -1, err;
9991         bool force_ioctl_attach;
9992
9993         if (!OPTS_VALID(opts, bpf_perf_event_opts))
9994                 return libbpf_err_ptr(-EINVAL);
9995
9996         if (pfd < 0) {
9997                 pr_warn("prog '%s': invalid perf event FD %d\n",
9998                         prog->name, pfd);
9999                 return libbpf_err_ptr(-EINVAL);
10000         }
10001         prog_fd = bpf_program__fd(prog);
10002         if (prog_fd < 0) {
10003                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10004                         prog->name);
10005                 return libbpf_err_ptr(-EINVAL);
10006         }
10007
10008         link = calloc(1, sizeof(*link));
10009         if (!link)
10010                 return libbpf_err_ptr(-ENOMEM);
10011         link->link.detach = &bpf_link_perf_detach;
10012         link->link.dealloc = &bpf_link_perf_dealloc;
10013         link->perf_event_fd = pfd;
10014
10015         force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10016         if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10017                 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10018                         .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10019
10020                 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10021                 if (link_fd < 0) {
10022                         err = -errno;
10023                         pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10024                                 prog->name, pfd,
10025                                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10026                         goto err_out;
10027                 }
10028                 link->link.fd = link_fd;
10029         } else {
10030                 if (OPTS_GET(opts, bpf_cookie, 0)) {
10031                         pr_warn("prog '%s': user context value is not supported\n", prog->name);
10032                         err = -EOPNOTSUPP;
10033                         goto err_out;
10034                 }
10035
10036                 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10037                         err = -errno;
10038                         pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10039                                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10040                         if (err == -EPROTO)
10041                                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10042                                         prog->name, pfd);
10043                         goto err_out;
10044                 }
10045                 link->link.fd = pfd;
10046         }
10047         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10048                 err = -errno;
10049                 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10050                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10051                 goto err_out;
10052         }
10053
10054         return &link->link;
10055 err_out:
10056         if (link_fd >= 0)
10057                 close(link_fd);
10058         free(link);
10059         return libbpf_err_ptr(err);
10060 }
10061
10062 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10063 {
10064         return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10065 }
10066
10067 /*
10068  * this function is expected to parse integer in the range of [0, 2^31-1] from
10069  * given file using scanf format string fmt. If actual parsed value is
10070  * negative, the result might be indistinguishable from error
10071  */
10072 static int parse_uint_from_file(const char *file, const char *fmt)
10073 {
10074         char buf[STRERR_BUFSIZE];
10075         int err, ret;
10076         FILE *f;
10077
10078         f = fopen(file, "re");
10079         if (!f) {
10080                 err = -errno;
10081                 pr_debug("failed to open '%s': %s\n", file,
10082                          libbpf_strerror_r(err, buf, sizeof(buf)));
10083                 return err;
10084         }
10085         err = fscanf(f, fmt, &ret);
10086         if (err != 1) {
10087                 err = err == EOF ? -EIO : -errno;
10088                 pr_debug("failed to parse '%s': %s\n", file,
10089                         libbpf_strerror_r(err, buf, sizeof(buf)));
10090                 fclose(f);
10091                 return err;
10092         }
10093         fclose(f);
10094         return ret;
10095 }
10096
10097 static int determine_kprobe_perf_type(void)
10098 {
10099         const char *file = "/sys/bus/event_source/devices/kprobe/type";
10100
10101         return parse_uint_from_file(file, "%d\n");
10102 }
10103
10104 static int determine_uprobe_perf_type(void)
10105 {
10106         const char *file = "/sys/bus/event_source/devices/uprobe/type";
10107
10108         return parse_uint_from_file(file, "%d\n");
10109 }
10110
10111 static int determine_kprobe_retprobe_bit(void)
10112 {
10113         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10114
10115         return parse_uint_from_file(file, "config:%d\n");
10116 }
10117
10118 static int determine_uprobe_retprobe_bit(void)
10119 {
10120         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10121
10122         return parse_uint_from_file(file, "config:%d\n");
10123 }
10124
10125 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10126 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10127
10128 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10129                                  uint64_t offset, int pid, size_t ref_ctr_off)
10130 {
10131         const size_t attr_sz = sizeof(struct perf_event_attr);
10132         struct perf_event_attr attr;
10133         char errmsg[STRERR_BUFSIZE];
10134         int type, pfd;
10135
10136         if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10137                 return -EINVAL;
10138
10139         memset(&attr, 0, attr_sz);
10140
10141         type = uprobe ? determine_uprobe_perf_type()
10142                       : determine_kprobe_perf_type();
10143         if (type < 0) {
10144                 pr_warn("failed to determine %s perf type: %s\n",
10145                         uprobe ? "uprobe" : "kprobe",
10146                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10147                 return type;
10148         }
10149         if (retprobe) {
10150                 int bit = uprobe ? determine_uprobe_retprobe_bit()
10151                                  : determine_kprobe_retprobe_bit();
10152
10153                 if (bit < 0) {
10154                         pr_warn("failed to determine %s retprobe bit: %s\n",
10155                                 uprobe ? "uprobe" : "kprobe",
10156                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10157                         return bit;
10158                 }
10159                 attr.config |= 1 << bit;
10160         }
10161         attr.size = attr_sz;
10162         attr.type = type;
10163         attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10164         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10165         attr.config2 = offset;           /* kprobe_addr or probe_offset */
10166
10167         /* pid filter is meaningful only for uprobes */
10168         pfd = syscall(__NR_perf_event_open, &attr,
10169                       pid < 0 ? -1 : pid /* pid */,
10170                       pid == -1 ? 0 : -1 /* cpu */,
10171                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10172         return pfd >= 0 ? pfd : -errno;
10173 }
10174
10175 static int append_to_file(const char *file, const char *fmt, ...)
10176 {
10177         int fd, n, err = 0;
10178         va_list ap;
10179         char buf[1024];
10180
10181         va_start(ap, fmt);
10182         n = vsnprintf(buf, sizeof(buf), fmt, ap);
10183         va_end(ap);
10184
10185         if (n < 0 || n >= sizeof(buf))
10186                 return -EINVAL;
10187
10188         fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10189         if (fd < 0)
10190                 return -errno;
10191
10192         if (write(fd, buf, n) < 0)
10193                 err = -errno;
10194
10195         close(fd);
10196         return err;
10197 }
10198
10199 #define DEBUGFS "/sys/kernel/debug/tracing"
10200 #define TRACEFS "/sys/kernel/tracing"
10201
10202 static bool use_debugfs(void)
10203 {
10204         static int has_debugfs = -1;
10205
10206         if (has_debugfs < 0)
10207                 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10208
10209         return has_debugfs == 1;
10210 }
10211
10212 static const char *tracefs_path(void)
10213 {
10214         return use_debugfs() ? DEBUGFS : TRACEFS;
10215 }
10216
10217 static const char *tracefs_kprobe_events(void)
10218 {
10219         return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10220 }
10221
10222 static const char *tracefs_uprobe_events(void)
10223 {
10224         return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10225 }
10226
10227 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10228                                          const char *kfunc_name, size_t offset)
10229 {
10230         static int index = 0;
10231         int i;
10232
10233         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10234                  __sync_fetch_and_add(&index, 1));
10235
10236         /* sanitize binary_path in the probe name */
10237         for (i = 0; buf[i]; i++) {
10238                 if (!isalnum(buf[i]))
10239                         buf[i] = '_';
10240         }
10241 }
10242
10243 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10244                                    const char *kfunc_name, size_t offset)
10245 {
10246         return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10247                               retprobe ? 'r' : 'p',
10248                               retprobe ? "kretprobes" : "kprobes",
10249                               probe_name, kfunc_name, offset);
10250 }
10251
10252 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10253 {
10254         return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10255                               retprobe ? "kretprobes" : "kprobes", probe_name);
10256 }
10257
10258 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10259 {
10260         char file[256];
10261
10262         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10263                  tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10264
10265         return parse_uint_from_file(file, "%d\n");
10266 }
10267
10268 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10269                                          const char *kfunc_name, size_t offset, int pid)
10270 {
10271         const size_t attr_sz = sizeof(struct perf_event_attr);
10272         struct perf_event_attr attr;
10273         char errmsg[STRERR_BUFSIZE];
10274         int type, pfd, err;
10275
10276         err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10277         if (err < 0) {
10278                 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10279                         kfunc_name, offset,
10280                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10281                 return err;
10282         }
10283         type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10284         if (type < 0) {
10285                 err = type;
10286                 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10287                         kfunc_name, offset,
10288                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10289                 goto err_clean_legacy;
10290         }
10291
10292         memset(&attr, 0, attr_sz);
10293         attr.size = attr_sz;
10294         attr.config = type;
10295         attr.type = PERF_TYPE_TRACEPOINT;
10296
10297         pfd = syscall(__NR_perf_event_open, &attr,
10298                       pid < 0 ? -1 : pid, /* pid */
10299                       pid == -1 ? 0 : -1, /* cpu */
10300                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10301         if (pfd < 0) {
10302                 err = -errno;
10303                 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10304                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10305                 goto err_clean_legacy;
10306         }
10307         return pfd;
10308
10309 err_clean_legacy:
10310         /* Clear the newly added legacy kprobe_event */
10311         remove_kprobe_event_legacy(probe_name, retprobe);
10312         return err;
10313 }
10314
10315 static const char *arch_specific_syscall_pfx(void)
10316 {
10317 #if defined(__x86_64__)
10318         return "x64";
10319 #elif defined(__i386__)
10320         return "ia32";
10321 #elif defined(__s390x__)
10322         return "s390x";
10323 #elif defined(__s390__)
10324         return "s390";
10325 #elif defined(__arm__)
10326         return "arm";
10327 #elif defined(__aarch64__)
10328         return "arm64";
10329 #elif defined(__mips__)
10330         return "mips";
10331 #elif defined(__riscv)
10332         return "riscv";
10333 #elif defined(__powerpc__)
10334         return "powerpc";
10335 #elif defined(__powerpc64__)
10336         return "powerpc64";
10337 #else
10338         return NULL;
10339 #endif
10340 }
10341
10342 static int probe_kern_syscall_wrapper(void)
10343 {
10344         char syscall_name[64];
10345         const char *ksys_pfx;
10346
10347         ksys_pfx = arch_specific_syscall_pfx();
10348         if (!ksys_pfx)
10349                 return 0;
10350
10351         snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10352
10353         if (determine_kprobe_perf_type() >= 0) {
10354                 int pfd;
10355
10356                 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10357                 if (pfd >= 0)
10358                         close(pfd);
10359
10360                 return pfd >= 0 ? 1 : 0;
10361         } else { /* legacy mode */
10362                 char probe_name[128];
10363
10364                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10365                 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10366                         return 0;
10367
10368                 (void)remove_kprobe_event_legacy(probe_name, false);
10369                 return 1;
10370         }
10371 }
10372
10373 struct bpf_link *
10374 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10375                                 const char *func_name,
10376                                 const struct bpf_kprobe_opts *opts)
10377 {
10378         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10379         enum probe_attach_mode attach_mode;
10380         char errmsg[STRERR_BUFSIZE];
10381         char *legacy_probe = NULL;
10382         struct bpf_link *link;
10383         size_t offset;
10384         bool retprobe, legacy;
10385         int pfd, err;
10386
10387         if (!OPTS_VALID(opts, bpf_kprobe_opts))
10388                 return libbpf_err_ptr(-EINVAL);
10389
10390         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
10391         retprobe = OPTS_GET(opts, retprobe, false);
10392         offset = OPTS_GET(opts, offset, 0);
10393         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10394
10395         legacy = determine_kprobe_perf_type() < 0;
10396         switch (attach_mode) {
10397         case PROBE_ATTACH_MODE_LEGACY:
10398                 legacy = true;
10399                 pe_opts.force_ioctl_attach = true;
10400                 break;
10401         case PROBE_ATTACH_MODE_PERF:
10402                 if (legacy)
10403                         return libbpf_err_ptr(-ENOTSUP);
10404                 pe_opts.force_ioctl_attach = true;
10405                 break;
10406         case PROBE_ATTACH_MODE_LINK:
10407                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
10408                         return libbpf_err_ptr(-ENOTSUP);
10409                 break;
10410         case PROBE_ATTACH_MODE_DEFAULT:
10411                 break;
10412         default:
10413                 return libbpf_err_ptr(-EINVAL);
10414         }
10415
10416         if (!legacy) {
10417                 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10418                                             func_name, offset,
10419                                             -1 /* pid */, 0 /* ref_ctr_off */);
10420         } else {
10421                 char probe_name[256];
10422
10423                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10424                                              func_name, offset);
10425
10426                 legacy_probe = strdup(probe_name);
10427                 if (!legacy_probe)
10428                         return libbpf_err_ptr(-ENOMEM);
10429
10430                 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10431                                                     offset, -1 /* pid */);
10432         }
10433         if (pfd < 0) {
10434                 err = -errno;
10435                 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10436                         prog->name, retprobe ? "kretprobe" : "kprobe",
10437                         func_name, offset,
10438                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10439                 goto err_out;
10440         }
10441         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10442         err = libbpf_get_error(link);
10443         if (err) {
10444                 close(pfd);
10445                 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10446                         prog->name, retprobe ? "kretprobe" : "kprobe",
10447                         func_name, offset,
10448                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10449                 goto err_clean_legacy;
10450         }
10451         if (legacy) {
10452                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10453
10454                 perf_link->legacy_probe_name = legacy_probe;
10455                 perf_link->legacy_is_kprobe = true;
10456                 perf_link->legacy_is_retprobe = retprobe;
10457         }
10458
10459         return link;
10460
10461 err_clean_legacy:
10462         if (legacy)
10463                 remove_kprobe_event_legacy(legacy_probe, retprobe);
10464 err_out:
10465         free(legacy_probe);
10466         return libbpf_err_ptr(err);
10467 }
10468
10469 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10470                                             bool retprobe,
10471                                             const char *func_name)
10472 {
10473         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10474                 .retprobe = retprobe,
10475         );
10476
10477         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10478 }
10479
10480 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10481                                               const char *syscall_name,
10482                                               const struct bpf_ksyscall_opts *opts)
10483 {
10484         LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10485         char func_name[128];
10486
10487         if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10488                 return libbpf_err_ptr(-EINVAL);
10489
10490         if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10491                 /* arch_specific_syscall_pfx() should never return NULL here
10492                  * because it is guarded by kernel_supports(). However, since
10493                  * compiler does not know that we have an explicit conditional
10494                  * as well.
10495                  */
10496                 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10497                          arch_specific_syscall_pfx() ? : "", syscall_name);
10498         } else {
10499                 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10500         }
10501
10502         kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10503         kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10504
10505         return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10506 }
10507
10508 /* Adapted from perf/util/string.c */
10509 static bool glob_match(const char *str, const char *pat)
10510 {
10511         while (*str && *pat && *pat != '*') {
10512                 if (*pat == '?') {      /* Matches any single character */
10513                         str++;
10514                         pat++;
10515                         continue;
10516                 }
10517                 if (*str != *pat)
10518                         return false;
10519                 str++;
10520                 pat++;
10521         }
10522         /* Check wild card */
10523         if (*pat == '*') {
10524                 while (*pat == '*')
10525                         pat++;
10526                 if (!*pat) /* Tail wild card matches all */
10527                         return true;
10528                 while (*str)
10529                         if (glob_match(str++, pat))
10530                                 return true;
10531         }
10532         return !*str && !*pat;
10533 }
10534
10535 struct kprobe_multi_resolve {
10536         const char *pattern;
10537         unsigned long *addrs;
10538         size_t cap;
10539         size_t cnt;
10540 };
10541
10542 static int
10543 resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
10544                         const char *sym_name, void *ctx)
10545 {
10546         struct kprobe_multi_resolve *res = ctx;
10547         int err;
10548
10549         if (!glob_match(sym_name, res->pattern))
10550                 return 0;
10551
10552         err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
10553                                 res->cnt + 1);
10554         if (err)
10555                 return err;
10556
10557         res->addrs[res->cnt++] = (unsigned long) sym_addr;
10558         return 0;
10559 }
10560
10561 struct bpf_link *
10562 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
10563                                       const char *pattern,
10564                                       const struct bpf_kprobe_multi_opts *opts)
10565 {
10566         LIBBPF_OPTS(bpf_link_create_opts, lopts);
10567         struct kprobe_multi_resolve res = {
10568                 .pattern = pattern,
10569         };
10570         struct bpf_link *link = NULL;
10571         char errmsg[STRERR_BUFSIZE];
10572         const unsigned long *addrs;
10573         int err, link_fd, prog_fd;
10574         const __u64 *cookies;
10575         const char **syms;
10576         bool retprobe;
10577         size_t cnt;
10578
10579         if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
10580                 return libbpf_err_ptr(-EINVAL);
10581
10582         syms    = OPTS_GET(opts, syms, false);
10583         addrs   = OPTS_GET(opts, addrs, false);
10584         cnt     = OPTS_GET(opts, cnt, false);
10585         cookies = OPTS_GET(opts, cookies, false);
10586
10587         if (!pattern && !addrs && !syms)
10588                 return libbpf_err_ptr(-EINVAL);
10589         if (pattern && (addrs || syms || cookies || cnt))
10590                 return libbpf_err_ptr(-EINVAL);
10591         if (!pattern && !cnt)
10592                 return libbpf_err_ptr(-EINVAL);
10593         if (addrs && syms)
10594                 return libbpf_err_ptr(-EINVAL);
10595
10596         if (pattern) {
10597                 err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
10598                 if (err)
10599                         goto error;
10600                 if (!res.cnt) {
10601                         err = -ENOENT;
10602                         goto error;
10603                 }
10604                 addrs = res.addrs;
10605                 cnt = res.cnt;
10606         }
10607
10608         retprobe = OPTS_GET(opts, retprobe, false);
10609
10610         lopts.kprobe_multi.syms = syms;
10611         lopts.kprobe_multi.addrs = addrs;
10612         lopts.kprobe_multi.cookies = cookies;
10613         lopts.kprobe_multi.cnt = cnt;
10614         lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
10615
10616         link = calloc(1, sizeof(*link));
10617         if (!link) {
10618                 err = -ENOMEM;
10619                 goto error;
10620         }
10621         link->detach = &bpf_link__detach_fd;
10622
10623         prog_fd = bpf_program__fd(prog);
10624         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
10625         if (link_fd < 0) {
10626                 err = -errno;
10627                 pr_warn("prog '%s': failed to attach: %s\n",
10628                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10629                 goto error;
10630         }
10631         link->fd = link_fd;
10632         free(res.addrs);
10633         return link;
10634
10635 error:
10636         free(link);
10637         free(res.addrs);
10638         return libbpf_err_ptr(err);
10639 }
10640
10641 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10642 {
10643         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10644         unsigned long offset = 0;
10645         const char *func_name;
10646         char *func;
10647         int n;
10648
10649         *link = NULL;
10650
10651         /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
10652         if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
10653                 return 0;
10654
10655         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
10656         if (opts.retprobe)
10657                 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
10658         else
10659                 func_name = prog->sec_name + sizeof("kprobe/") - 1;
10660
10661         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10662         if (n < 1) {
10663                 pr_warn("kprobe name is invalid: %s\n", func_name);
10664                 return -EINVAL;
10665         }
10666         if (opts.retprobe && offset != 0) {
10667                 free(func);
10668                 pr_warn("kretprobes do not support offset specification\n");
10669                 return -EINVAL;
10670         }
10671
10672         opts.offset = offset;
10673         *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10674         free(func);
10675         return libbpf_get_error(*link);
10676 }
10677
10678 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10679 {
10680         LIBBPF_OPTS(bpf_ksyscall_opts, opts);
10681         const char *syscall_name;
10682
10683         *link = NULL;
10684
10685         /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
10686         if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
10687                 return 0;
10688
10689         opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
10690         if (opts.retprobe)
10691                 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
10692         else
10693                 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
10694
10695         *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
10696         return *link ? 0 : -errno;
10697 }
10698
10699 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10700 {
10701         LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
10702         const char *spec;
10703         char *pattern;
10704         int n;
10705
10706         *link = NULL;
10707
10708         /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
10709         if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
10710             strcmp(prog->sec_name, "kretprobe.multi") == 0)
10711                 return 0;
10712
10713         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
10714         if (opts.retprobe)
10715                 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
10716         else
10717                 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
10718
10719         n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
10720         if (n < 1) {
10721                 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
10722                 return -EINVAL;
10723         }
10724
10725         *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
10726         free(pattern);
10727         return libbpf_get_error(*link);
10728 }
10729
10730 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
10731                                          const char *binary_path, uint64_t offset)
10732 {
10733         int i;
10734
10735         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
10736
10737         /* sanitize binary_path in the probe name */
10738         for (i = 0; buf[i]; i++) {
10739                 if (!isalnum(buf[i]))
10740                         buf[i] = '_';
10741         }
10742 }
10743
10744 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
10745                                           const char *binary_path, size_t offset)
10746 {
10747         return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
10748                               retprobe ? 'r' : 'p',
10749                               retprobe ? "uretprobes" : "uprobes",
10750                               probe_name, binary_path, offset);
10751 }
10752
10753 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
10754 {
10755         return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
10756                               retprobe ? "uretprobes" : "uprobes", probe_name);
10757 }
10758
10759 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10760 {
10761         char file[512];
10762
10763         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10764                  tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
10765
10766         return parse_uint_from_file(file, "%d\n");
10767 }
10768
10769 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
10770                                          const char *binary_path, size_t offset, int pid)
10771 {
10772         const size_t attr_sz = sizeof(struct perf_event_attr);
10773         struct perf_event_attr attr;
10774         int type, pfd, err;
10775
10776         err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
10777         if (err < 0) {
10778                 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
10779                         binary_path, (size_t)offset, err);
10780                 return err;
10781         }
10782         type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
10783         if (type < 0) {
10784                 err = type;
10785                 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
10786                         binary_path, offset, err);
10787                 goto err_clean_legacy;
10788         }
10789
10790         memset(&attr, 0, attr_sz);
10791         attr.size = attr_sz;
10792         attr.config = type;
10793         attr.type = PERF_TYPE_TRACEPOINT;
10794
10795         pfd = syscall(__NR_perf_event_open, &attr,
10796                       pid < 0 ? -1 : pid, /* pid */
10797                       pid == -1 ? 0 : -1, /* cpu */
10798                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10799         if (pfd < 0) {
10800                 err = -errno;
10801                 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
10802                 goto err_clean_legacy;
10803         }
10804         return pfd;
10805
10806 err_clean_legacy:
10807         /* Clear the newly added legacy uprobe_event */
10808         remove_uprobe_event_legacy(probe_name, retprobe);
10809         return err;
10810 }
10811
10812 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
10813 static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
10814 {
10815         while ((scn = elf_nextscn(elf, scn)) != NULL) {
10816                 GElf_Shdr sh;
10817
10818                 if (!gelf_getshdr(scn, &sh))
10819                         continue;
10820                 if (sh.sh_type == sh_type)
10821                         return scn;
10822         }
10823         return NULL;
10824 }
10825
10826 /* Find offset of function name in the provided ELF object. "binary_path" is
10827  * the path to the ELF binary represented by "elf", and only used for error
10828  * reporting matters. "name" matches symbol name or name@@LIB for library
10829  * functions.
10830  */
10831 static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
10832 {
10833         int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10834         bool is_shared_lib, is_name_qualified;
10835         long ret = -ENOENT;
10836         size_t name_len;
10837         GElf_Ehdr ehdr;
10838
10839         if (!gelf_getehdr(elf, &ehdr)) {
10840                 pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
10841                 ret = -LIBBPF_ERRNO__FORMAT;
10842                 goto out;
10843         }
10844         /* for shared lib case, we do not need to calculate relative offset */
10845         is_shared_lib = ehdr.e_type == ET_DYN;
10846
10847         name_len = strlen(name);
10848         /* Does name specify "@@LIB"? */
10849         is_name_qualified = strstr(name, "@@") != NULL;
10850
10851         /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
10852          * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
10853          * linked binary may not have SHT_DYMSYM, so absence of a section should not be
10854          * reported as a warning/error.
10855          */
10856         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
10857                 size_t nr_syms, strtabidx, idx;
10858                 Elf_Data *symbols = NULL;
10859                 Elf_Scn *scn = NULL;
10860                 int last_bind = -1;
10861                 const char *sname;
10862                 GElf_Shdr sh;
10863
10864                 scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
10865                 if (!scn) {
10866                         pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
10867                                  binary_path);
10868                         continue;
10869                 }
10870                 if (!gelf_getshdr(scn, &sh))
10871                         continue;
10872                 strtabidx = sh.sh_link;
10873                 symbols = elf_getdata(scn, 0);
10874                 if (!symbols) {
10875                         pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
10876                                 binary_path, elf_errmsg(-1));
10877                         ret = -LIBBPF_ERRNO__FORMAT;
10878                         goto out;
10879                 }
10880                 nr_syms = symbols->d_size / sh.sh_entsize;
10881
10882                 for (idx = 0; idx < nr_syms; idx++) {
10883                         int curr_bind;
10884                         GElf_Sym sym;
10885                         Elf_Scn *sym_scn;
10886                         GElf_Shdr sym_sh;
10887
10888                         if (!gelf_getsym(symbols, idx, &sym))
10889                                 continue;
10890
10891                         if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
10892                                 continue;
10893
10894                         sname = elf_strptr(elf, strtabidx, sym.st_name);
10895                         if (!sname)
10896                                 continue;
10897
10898                         curr_bind = GELF_ST_BIND(sym.st_info);
10899
10900                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
10901                         if (strncmp(sname, name, name_len) != 0)
10902                                 continue;
10903                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
10904                          * additional characters in sname should be of the form "@@LIB".
10905                          */
10906                         if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
10907                                 continue;
10908
10909                         if (ret >= 0) {
10910                                 /* handle multiple matches */
10911                                 if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
10912                                         /* Only accept one non-weak bind. */
10913                                         pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
10914                                                 sname, name, binary_path);
10915                                         ret = -LIBBPF_ERRNO__FORMAT;
10916                                         goto out;
10917                                 } else if (curr_bind == STB_WEAK) {
10918                                         /* already have a non-weak bind, and
10919                                          * this is a weak bind, so ignore.
10920                                          */
10921                                         continue;
10922                                 }
10923                         }
10924
10925                         /* Transform symbol's virtual address (absolute for
10926                          * binaries and relative for shared libs) into file
10927                          * offset, which is what kernel is expecting for
10928                          * uprobe/uretprobe attachment.
10929                          * See Documentation/trace/uprobetracer.rst for more
10930                          * details.
10931                          * This is done by looking up symbol's containing
10932                          * section's header and using it's virtual address
10933                          * (sh_addr) and corresponding file offset (sh_offset)
10934                          * to transform sym.st_value (virtual address) into
10935                          * desired final file offset.
10936                          */
10937                         sym_scn = elf_getscn(elf, sym.st_shndx);
10938                         if (!sym_scn)
10939                                 continue;
10940                         if (!gelf_getshdr(sym_scn, &sym_sh))
10941                                 continue;
10942
10943                         ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
10944                         last_bind = curr_bind;
10945                 }
10946                 if (ret > 0)
10947                         break;
10948         }
10949
10950         if (ret > 0) {
10951                 pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
10952                          ret);
10953         } else {
10954                 if (ret == 0) {
10955                         pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
10956                                 is_shared_lib ? "should not be 0 in a shared library" :
10957                                                 "try using shared library path instead");
10958                         ret = -ENOENT;
10959                 } else {
10960                         pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
10961                 }
10962         }
10963 out:
10964         return ret;
10965 }
10966
10967 /* Find offset of function name in ELF object specified by path. "name" matches
10968  * symbol name or name@@LIB for library functions.
10969  */
10970 static long elf_find_func_offset_from_file(const char *binary_path, const char *name)
10971 {
10972         char errmsg[STRERR_BUFSIZE];
10973         long ret = -ENOENT;
10974         Elf *elf;
10975         int fd;
10976
10977         fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10978         if (fd < 0) {
10979                 ret = -errno;
10980                 pr_warn("failed to open %s: %s\n", binary_path,
10981                         libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10982                 return ret;
10983         }
10984         elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10985         if (!elf) {
10986                 pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10987                 close(fd);
10988                 return -LIBBPF_ERRNO__FORMAT;
10989         }
10990
10991         ret = elf_find_func_offset(elf, binary_path, name);
10992         elf_end(elf);
10993         close(fd);
10994         return ret;
10995 }
10996
10997 /* Find offset of function name in archive specified by path. Currently
10998  * supported are .zip files that do not compress their contents, as used on
10999  * Android in the form of APKs, for example. "file_name" is the name of the ELF
11000  * file inside the archive. "func_name" matches symbol name or name@@LIB for
11001  * library functions.
11002  *
11003  * An overview of the APK format specifically provided here:
11004  * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11005  */
11006 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11007                                               const char *func_name)
11008 {
11009         struct zip_archive *archive;
11010         struct zip_entry entry;
11011         long ret;
11012         Elf *elf;
11013
11014         archive = zip_archive_open(archive_path);
11015         if (IS_ERR(archive)) {
11016                 ret = PTR_ERR(archive);
11017                 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11018                 return ret;
11019         }
11020
11021         ret = zip_archive_find_entry(archive, file_name, &entry);
11022         if (ret) {
11023                 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11024                         archive_path, ret);
11025                 goto out;
11026         }
11027         pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11028                  (unsigned long)entry.data_offset);
11029
11030         if (entry.compression) {
11031                 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11032                         archive_path);
11033                 ret = -LIBBPF_ERRNO__FORMAT;
11034                 goto out;
11035         }
11036
11037         elf = elf_memory((void *)entry.data, entry.data_length);
11038         if (!elf) {
11039                 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11040                         elf_errmsg(-1));
11041                 ret = -LIBBPF_ERRNO__LIBELF;
11042                 goto out;
11043         }
11044
11045         ret = elf_find_func_offset(elf, file_name, func_name);
11046         if (ret > 0) {
11047                 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11048                          func_name, file_name, archive_path, entry.data_offset, ret,
11049                          ret + entry.data_offset);
11050                 ret += entry.data_offset;
11051         }
11052         elf_end(elf);
11053
11054 out:
11055         zip_archive_close(archive);
11056         return ret;
11057 }
11058
11059 static const char *arch_specific_lib_paths(void)
11060 {
11061         /*
11062          * Based on https://packages.debian.org/sid/libc6.
11063          *
11064          * Assume that the traced program is built for the same architecture
11065          * as libbpf, which should cover the vast majority of cases.
11066          */
11067 #if defined(__x86_64__)
11068         return "/lib/x86_64-linux-gnu";
11069 #elif defined(__i386__)
11070         return "/lib/i386-linux-gnu";
11071 #elif defined(__s390x__)
11072         return "/lib/s390x-linux-gnu";
11073 #elif defined(__s390__)
11074         return "/lib/s390-linux-gnu";
11075 #elif defined(__arm__) && defined(__SOFTFP__)
11076         return "/lib/arm-linux-gnueabi";
11077 #elif defined(__arm__) && !defined(__SOFTFP__)
11078         return "/lib/arm-linux-gnueabihf";
11079 #elif defined(__aarch64__)
11080         return "/lib/aarch64-linux-gnu";
11081 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11082         return "/lib/mips64el-linux-gnuabi64";
11083 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11084         return "/lib/mipsel-linux-gnu";
11085 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11086         return "/lib/powerpc64le-linux-gnu";
11087 #elif defined(__sparc__) && defined(__arch64__)
11088         return "/lib/sparc64-linux-gnu";
11089 #elif defined(__riscv) && __riscv_xlen == 64
11090         return "/lib/riscv64-linux-gnu";
11091 #else
11092         return NULL;
11093 #endif
11094 }
11095
11096 /* Get full path to program/shared library. */
11097 static int resolve_full_path(const char *file, char *result, size_t result_sz)
11098 {
11099         const char *search_paths[3] = {};
11100         int i, perm;
11101
11102         if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11103                 search_paths[0] = getenv("LD_LIBRARY_PATH");
11104                 search_paths[1] = "/usr/lib64:/usr/lib";
11105                 search_paths[2] = arch_specific_lib_paths();
11106                 perm = R_OK;
11107         } else {
11108                 search_paths[0] = getenv("PATH");
11109                 search_paths[1] = "/usr/bin:/usr/sbin";
11110                 perm = R_OK | X_OK;
11111         }
11112
11113         for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11114                 const char *s;
11115
11116                 if (!search_paths[i])
11117                         continue;
11118                 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11119                         char *next_path;
11120                         int seg_len;
11121
11122                         if (s[0] == ':')
11123                                 s++;
11124                         next_path = strchr(s, ':');
11125                         seg_len = next_path ? next_path - s : strlen(s);
11126                         if (!seg_len)
11127                                 continue;
11128                         snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11129                         /* ensure it has required permissions */
11130                         if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11131                                 continue;
11132                         pr_debug("resolved '%s' to '%s'\n", file, result);
11133                         return 0;
11134                 }
11135         }
11136         return -ENOENT;
11137 }
11138
11139 LIBBPF_API struct bpf_link *
11140 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11141                                 const char *binary_path, size_t func_offset,
11142                                 const struct bpf_uprobe_opts *opts)
11143 {
11144         const char *archive_path = NULL, *archive_sep = NULL;
11145         char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11146         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11147         enum probe_attach_mode attach_mode;
11148         char full_path[PATH_MAX];
11149         struct bpf_link *link;
11150         size_t ref_ctr_off;
11151         int pfd, err;
11152         bool retprobe, legacy;
11153         const char *func_name;
11154
11155         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11156                 return libbpf_err_ptr(-EINVAL);
11157
11158         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11159         retprobe = OPTS_GET(opts, retprobe, false);
11160         ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11161         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11162
11163         if (!binary_path)
11164                 return libbpf_err_ptr(-EINVAL);
11165
11166         /* Check if "binary_path" refers to an archive. */
11167         archive_sep = strstr(binary_path, "!/");
11168         if (archive_sep) {
11169                 full_path[0] = '\0';
11170                 libbpf_strlcpy(full_path, binary_path,
11171                                min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11172                 archive_path = full_path;
11173                 binary_path = archive_sep + 2;
11174         } else if (!strchr(binary_path, '/')) {
11175                 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11176                 if (err) {
11177                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11178                                 prog->name, binary_path, err);
11179                         return libbpf_err_ptr(err);
11180                 }
11181                 binary_path = full_path;
11182         }
11183         func_name = OPTS_GET(opts, func_name, NULL);
11184         if (func_name) {
11185                 long sym_off;
11186
11187                 if (archive_path) {
11188                         sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11189                                                                     func_name);
11190                         binary_path = archive_path;
11191                 } else {
11192                         sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11193                 }
11194                 if (sym_off < 0)
11195                         return libbpf_err_ptr(sym_off);
11196                 func_offset += sym_off;
11197         }
11198
11199         legacy = determine_uprobe_perf_type() < 0;
11200         switch (attach_mode) {
11201         case PROBE_ATTACH_MODE_LEGACY:
11202                 legacy = true;
11203                 pe_opts.force_ioctl_attach = true;
11204                 break;
11205         case PROBE_ATTACH_MODE_PERF:
11206                 if (legacy)
11207                         return libbpf_err_ptr(-ENOTSUP);
11208                 pe_opts.force_ioctl_attach = true;
11209                 break;
11210         case PROBE_ATTACH_MODE_LINK:
11211                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11212                         return libbpf_err_ptr(-ENOTSUP);
11213                 break;
11214         case PROBE_ATTACH_MODE_DEFAULT:
11215                 break;
11216         default:
11217                 return libbpf_err_ptr(-EINVAL);
11218         }
11219
11220         if (!legacy) {
11221                 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
11222                                             func_offset, pid, ref_ctr_off);
11223         } else {
11224                 char probe_name[PATH_MAX + 64];
11225
11226                 if (ref_ctr_off)
11227                         return libbpf_err_ptr(-EINVAL);
11228
11229                 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
11230                                              binary_path, func_offset);
11231
11232                 legacy_probe = strdup(probe_name);
11233                 if (!legacy_probe)
11234                         return libbpf_err_ptr(-ENOMEM);
11235
11236                 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
11237                                                     binary_path, func_offset, pid);
11238         }
11239         if (pfd < 0) {
11240                 err = -errno;
11241                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
11242                         prog->name, retprobe ? "uretprobe" : "uprobe",
11243                         binary_path, func_offset,
11244                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11245                 goto err_out;
11246         }
11247
11248         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11249         err = libbpf_get_error(link);
11250         if (err) {
11251                 close(pfd);
11252                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
11253                         prog->name, retprobe ? "uretprobe" : "uprobe",
11254                         binary_path, func_offset,
11255                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11256                 goto err_clean_legacy;
11257         }
11258         if (legacy) {
11259                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11260
11261                 perf_link->legacy_probe_name = legacy_probe;
11262                 perf_link->legacy_is_kprobe = false;
11263                 perf_link->legacy_is_retprobe = retprobe;
11264         }
11265         return link;
11266
11267 err_clean_legacy:
11268         if (legacy)
11269                 remove_uprobe_event_legacy(legacy_probe, retprobe);
11270 err_out:
11271         free(legacy_probe);
11272         return libbpf_err_ptr(err);
11273 }
11274
11275 /* Format of u[ret]probe section definition supporting auto-attach:
11276  * u[ret]probe/binary:function[+offset]
11277  *
11278  * binary can be an absolute/relative path or a filename; the latter is resolved to a
11279  * full binary path via bpf_program__attach_uprobe_opts.
11280  *
11281  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
11282  * specified (and auto-attach is not possible) or the above format is specified for
11283  * auto-attach.
11284  */
11285 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11286 {
11287         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
11288         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11289         int n, ret = -EINVAL;
11290         long offset = 0;
11291
11292         *link = NULL;
11293
11294         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
11295                    &probe_type, &binary_path, &func_name, &offset);
11296         switch (n) {
11297         case 1:
11298                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11299                 ret = 0;
11300                 break;
11301         case 2:
11302                 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
11303                         prog->name, prog->sec_name);
11304                 break;
11305         case 3:
11306         case 4:
11307                 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
11308                                 strcmp(probe_type, "uretprobe.s") == 0;
11309                 if (opts.retprobe && offset != 0) {
11310                         pr_warn("prog '%s': uretprobes do not support offset specification\n",
11311                                 prog->name);
11312                         break;
11313                 }
11314                 opts.func_name = func_name;
11315                 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
11316                 ret = libbpf_get_error(*link);
11317                 break;
11318         default:
11319                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11320                         prog->sec_name);
11321                 break;
11322         }
11323         free(probe_type);
11324         free(binary_path);
11325         free(func_name);
11326
11327         return ret;
11328 }
11329
11330 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
11331                                             bool retprobe, pid_t pid,
11332                                             const char *binary_path,
11333                                             size_t func_offset)
11334 {
11335         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
11336
11337         return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
11338 }
11339
11340 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
11341                                           pid_t pid, const char *binary_path,
11342                                           const char *usdt_provider, const char *usdt_name,
11343                                           const struct bpf_usdt_opts *opts)
11344 {
11345         char resolved_path[512];
11346         struct bpf_object *obj = prog->obj;
11347         struct bpf_link *link;
11348         __u64 usdt_cookie;
11349         int err;
11350
11351         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11352                 return libbpf_err_ptr(-EINVAL);
11353
11354         if (bpf_program__fd(prog) < 0) {
11355                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
11356                         prog->name);
11357                 return libbpf_err_ptr(-EINVAL);
11358         }
11359
11360         if (!binary_path)
11361                 return libbpf_err_ptr(-EINVAL);
11362
11363         if (!strchr(binary_path, '/')) {
11364                 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
11365                 if (err) {
11366                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11367                                 prog->name, binary_path, err);
11368                         return libbpf_err_ptr(err);
11369                 }
11370                 binary_path = resolved_path;
11371         }
11372
11373         /* USDT manager is instantiated lazily on first USDT attach. It will
11374          * be destroyed together with BPF object in bpf_object__close().
11375          */
11376         if (IS_ERR(obj->usdt_man))
11377                 return libbpf_ptr(obj->usdt_man);
11378         if (!obj->usdt_man) {
11379                 obj->usdt_man = usdt_manager_new(obj);
11380                 if (IS_ERR(obj->usdt_man))
11381                         return libbpf_ptr(obj->usdt_man);
11382         }
11383
11384         usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
11385         link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
11386                                         usdt_provider, usdt_name, usdt_cookie);
11387         err = libbpf_get_error(link);
11388         if (err)
11389                 return libbpf_err_ptr(err);
11390         return link;
11391 }
11392
11393 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11394 {
11395         char *path = NULL, *provider = NULL, *name = NULL;
11396         const char *sec_name;
11397         int n, err;
11398
11399         sec_name = bpf_program__section_name(prog);
11400         if (strcmp(sec_name, "usdt") == 0) {
11401                 /* no auto-attach for just SEC("usdt") */
11402                 *link = NULL;
11403                 return 0;
11404         }
11405
11406         n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
11407         if (n != 3) {
11408                 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
11409                         sec_name);
11410                 err = -EINVAL;
11411         } else {
11412                 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
11413                                                  provider, name, NULL);
11414                 err = libbpf_get_error(*link);
11415         }
11416         free(path);
11417         free(provider);
11418         free(name);
11419         return err;
11420 }
11421
11422 static int determine_tracepoint_id(const char *tp_category,
11423                                    const char *tp_name)
11424 {
11425         char file[PATH_MAX];
11426         int ret;
11427
11428         ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11429                        tracefs_path(), tp_category, tp_name);
11430         if (ret < 0)
11431                 return -errno;
11432         if (ret >= sizeof(file)) {
11433                 pr_debug("tracepoint %s/%s path is too long\n",
11434                          tp_category, tp_name);
11435                 return -E2BIG;
11436         }
11437         return parse_uint_from_file(file, "%d\n");
11438 }
11439
11440 static int perf_event_open_tracepoint(const char *tp_category,
11441                                       const char *tp_name)
11442 {
11443         const size_t attr_sz = sizeof(struct perf_event_attr);
11444         struct perf_event_attr attr;
11445         char errmsg[STRERR_BUFSIZE];
11446         int tp_id, pfd, err;
11447
11448         tp_id = determine_tracepoint_id(tp_category, tp_name);
11449         if (tp_id < 0) {
11450                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
11451                         tp_category, tp_name,
11452                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
11453                 return tp_id;
11454         }
11455
11456         memset(&attr, 0, attr_sz);
11457         attr.type = PERF_TYPE_TRACEPOINT;
11458         attr.size = attr_sz;
11459         attr.config = tp_id;
11460
11461         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
11462                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11463         if (pfd < 0) {
11464                 err = -errno;
11465                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
11466                         tp_category, tp_name,
11467                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11468                 return err;
11469         }
11470         return pfd;
11471 }
11472
11473 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
11474                                                      const char *tp_category,
11475                                                      const char *tp_name,
11476                                                      const struct bpf_tracepoint_opts *opts)
11477 {
11478         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11479         char errmsg[STRERR_BUFSIZE];
11480         struct bpf_link *link;
11481         int pfd, err;
11482
11483         if (!OPTS_VALID(opts, bpf_tracepoint_opts))
11484                 return libbpf_err_ptr(-EINVAL);
11485
11486         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11487
11488         pfd = perf_event_open_tracepoint(tp_category, tp_name);
11489         if (pfd < 0) {
11490                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
11491                         prog->name, tp_category, tp_name,
11492                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11493                 return libbpf_err_ptr(pfd);
11494         }
11495         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11496         err = libbpf_get_error(link);
11497         if (err) {
11498                 close(pfd);
11499                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
11500                         prog->name, tp_category, tp_name,
11501                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11502                 return libbpf_err_ptr(err);
11503         }
11504         return link;
11505 }
11506
11507 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
11508                                                 const char *tp_category,
11509                                                 const char *tp_name)
11510 {
11511         return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
11512 }
11513
11514 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11515 {
11516         char *sec_name, *tp_cat, *tp_name;
11517
11518         *link = NULL;
11519
11520         /* no auto-attach for SEC("tp") or SEC("tracepoint") */
11521         if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
11522                 return 0;
11523
11524         sec_name = strdup(prog->sec_name);
11525         if (!sec_name)
11526                 return -ENOMEM;
11527
11528         /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
11529         if (str_has_pfx(prog->sec_name, "tp/"))
11530                 tp_cat = sec_name + sizeof("tp/") - 1;
11531         else
11532                 tp_cat = sec_name + sizeof("tracepoint/") - 1;
11533         tp_name = strchr(tp_cat, '/');
11534         if (!tp_name) {
11535                 free(sec_name);
11536                 return -EINVAL;
11537         }
11538         *tp_name = '\0';
11539         tp_name++;
11540
11541         *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
11542         free(sec_name);
11543         return libbpf_get_error(*link);
11544 }
11545
11546 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
11547                                                     const char *tp_name)
11548 {
11549         char errmsg[STRERR_BUFSIZE];
11550         struct bpf_link *link;
11551         int prog_fd, pfd;
11552
11553         prog_fd = bpf_program__fd(prog);
11554         if (prog_fd < 0) {
11555                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11556                 return libbpf_err_ptr(-EINVAL);
11557         }
11558
11559         link = calloc(1, sizeof(*link));
11560         if (!link)
11561                 return libbpf_err_ptr(-ENOMEM);
11562         link->detach = &bpf_link__detach_fd;
11563
11564         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
11565         if (pfd < 0) {
11566                 pfd = -errno;
11567                 free(link);
11568                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
11569                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11570                 return libbpf_err_ptr(pfd);
11571         }
11572         link->fd = pfd;
11573         return link;
11574 }
11575
11576 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11577 {
11578         static const char *const prefixes[] = {
11579                 "raw_tp",
11580                 "raw_tracepoint",
11581                 "raw_tp.w",
11582                 "raw_tracepoint.w",
11583         };
11584         size_t i;
11585         const char *tp_name = NULL;
11586
11587         *link = NULL;
11588
11589         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
11590                 size_t pfx_len;
11591
11592                 if (!str_has_pfx(prog->sec_name, prefixes[i]))
11593                         continue;
11594
11595                 pfx_len = strlen(prefixes[i]);
11596                 /* no auto-attach case of, e.g., SEC("raw_tp") */
11597                 if (prog->sec_name[pfx_len] == '\0')
11598                         return 0;
11599
11600                 if (prog->sec_name[pfx_len] != '/')
11601                         continue;
11602
11603                 tp_name = prog->sec_name + pfx_len + 1;
11604                 break;
11605         }
11606
11607         if (!tp_name) {
11608                 pr_warn("prog '%s': invalid section name '%s'\n",
11609                         prog->name, prog->sec_name);
11610                 return -EINVAL;
11611         }
11612
11613         *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
11614         return libbpf_get_error(*link);
11615 }
11616
11617 /* Common logic for all BPF program types that attach to a btf_id */
11618 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
11619                                                    const struct bpf_trace_opts *opts)
11620 {
11621         LIBBPF_OPTS(bpf_link_create_opts, link_opts);
11622         char errmsg[STRERR_BUFSIZE];
11623         struct bpf_link *link;
11624         int prog_fd, pfd;
11625
11626         if (!OPTS_VALID(opts, bpf_trace_opts))
11627                 return libbpf_err_ptr(-EINVAL);
11628
11629         prog_fd = bpf_program__fd(prog);
11630         if (prog_fd < 0) {
11631                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11632                 return libbpf_err_ptr(-EINVAL);
11633         }
11634
11635         link = calloc(1, sizeof(*link));
11636         if (!link)
11637                 return libbpf_err_ptr(-ENOMEM);
11638         link->detach = &bpf_link__detach_fd;
11639
11640         /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
11641         link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
11642         pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
11643         if (pfd < 0) {
11644                 pfd = -errno;
11645                 free(link);
11646                 pr_warn("prog '%s': failed to attach: %s\n",
11647                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11648                 return libbpf_err_ptr(pfd);
11649         }
11650         link->fd = pfd;
11651         return link;
11652 }
11653
11654 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
11655 {
11656         return bpf_program__attach_btf_id(prog, NULL);
11657 }
11658
11659 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
11660                                                 const struct bpf_trace_opts *opts)
11661 {
11662         return bpf_program__attach_btf_id(prog, opts);
11663 }
11664
11665 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
11666 {
11667         return bpf_program__attach_btf_id(prog, NULL);
11668 }
11669
11670 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11671 {
11672         *link = bpf_program__attach_trace(prog);
11673         return libbpf_get_error(*link);
11674 }
11675
11676 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11677 {
11678         *link = bpf_program__attach_lsm(prog);
11679         return libbpf_get_error(*link);
11680 }
11681
11682 static struct bpf_link *
11683 bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
11684                        const char *target_name)
11685 {
11686         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
11687                             .target_btf_id = btf_id);
11688         enum bpf_attach_type attach_type;
11689         char errmsg[STRERR_BUFSIZE];
11690         struct bpf_link *link;
11691         int prog_fd, link_fd;
11692
11693         prog_fd = bpf_program__fd(prog);
11694         if (prog_fd < 0) {
11695                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11696                 return libbpf_err_ptr(-EINVAL);
11697         }
11698
11699         link = calloc(1, sizeof(*link));
11700         if (!link)
11701                 return libbpf_err_ptr(-ENOMEM);
11702         link->detach = &bpf_link__detach_fd;
11703
11704         attach_type = bpf_program__expected_attach_type(prog);
11705         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
11706         if (link_fd < 0) {
11707                 link_fd = -errno;
11708                 free(link);
11709                 pr_warn("prog '%s': failed to attach to %s: %s\n",
11710                         prog->name, target_name,
11711                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11712                 return libbpf_err_ptr(link_fd);
11713         }
11714         link->fd = link_fd;
11715         return link;
11716 }
11717
11718 struct bpf_link *
11719 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
11720 {
11721         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
11722 }
11723
11724 struct bpf_link *
11725 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
11726 {
11727         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
11728 }
11729
11730 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
11731 {
11732         /* target_fd/target_ifindex use the same field in LINK_CREATE */
11733         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
11734 }
11735
11736 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
11737                                               int target_fd,
11738                                               const char *attach_func_name)
11739 {
11740         int btf_id;
11741
11742         if (!!target_fd != !!attach_func_name) {
11743                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
11744                         prog->name);
11745                 return libbpf_err_ptr(-EINVAL);
11746         }
11747
11748         if (prog->type != BPF_PROG_TYPE_EXT) {
11749                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
11750                         prog->name);
11751                 return libbpf_err_ptr(-EINVAL);
11752         }
11753
11754         if (target_fd) {
11755                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
11756                 if (btf_id < 0)
11757                         return libbpf_err_ptr(btf_id);
11758
11759                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
11760         } else {
11761                 /* no target, so use raw_tracepoint_open for compatibility
11762                  * with old kernels
11763                  */
11764                 return bpf_program__attach_trace(prog);
11765         }
11766 }
11767
11768 struct bpf_link *
11769 bpf_program__attach_iter(const struct bpf_program *prog,
11770                          const struct bpf_iter_attach_opts *opts)
11771 {
11772         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
11773         char errmsg[STRERR_BUFSIZE];
11774         struct bpf_link *link;
11775         int prog_fd, link_fd;
11776         __u32 target_fd = 0;
11777
11778         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
11779                 return libbpf_err_ptr(-EINVAL);
11780
11781         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
11782         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
11783
11784         prog_fd = bpf_program__fd(prog);
11785         if (prog_fd < 0) {
11786                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11787                 return libbpf_err_ptr(-EINVAL);
11788         }
11789
11790         link = calloc(1, sizeof(*link));
11791         if (!link)
11792                 return libbpf_err_ptr(-ENOMEM);
11793         link->detach = &bpf_link__detach_fd;
11794
11795         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
11796                                   &link_create_opts);
11797         if (link_fd < 0) {
11798                 link_fd = -errno;
11799                 free(link);
11800                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
11801                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11802                 return libbpf_err_ptr(link_fd);
11803         }
11804         link->fd = link_fd;
11805         return link;
11806 }
11807
11808 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11809 {
11810         *link = bpf_program__attach_iter(prog, NULL);
11811         return libbpf_get_error(*link);
11812 }
11813
11814 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
11815 {
11816         struct bpf_link *link = NULL;
11817         int err;
11818
11819         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
11820                 return libbpf_err_ptr(-EOPNOTSUPP);
11821
11822         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
11823         if (err)
11824                 return libbpf_err_ptr(err);
11825
11826         /* When calling bpf_program__attach() explicitly, auto-attach support
11827          * is expected to work, so NULL returned link is considered an error.
11828          * This is different for skeleton's attach, see comment in
11829          * bpf_object__attach_skeleton().
11830          */
11831         if (!link)
11832                 return libbpf_err_ptr(-EOPNOTSUPP);
11833
11834         return link;
11835 }
11836
11837 struct bpf_link_struct_ops {
11838         struct bpf_link link;
11839         int map_fd;
11840 };
11841
11842 static int bpf_link__detach_struct_ops(struct bpf_link *link)
11843 {
11844         struct bpf_link_struct_ops *st_link;
11845         __u32 zero = 0;
11846
11847         st_link = container_of(link, struct bpf_link_struct_ops, link);
11848
11849         if (st_link->map_fd < 0)
11850                 /* w/o a real link */
11851                 return bpf_map_delete_elem(link->fd, &zero);
11852
11853         return close(link->fd);
11854 }
11855
11856 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
11857 {
11858         struct bpf_link_struct_ops *link;
11859         __u32 zero = 0;
11860         int err, fd;
11861
11862         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
11863                 return libbpf_err_ptr(-EINVAL);
11864
11865         link = calloc(1, sizeof(*link));
11866         if (!link)
11867                 return libbpf_err_ptr(-EINVAL);
11868
11869         /* kern_vdata should be prepared during the loading phase. */
11870         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
11871         /* It can be EBUSY if the map has been used to create or
11872          * update a link before.  We don't allow updating the value of
11873          * a struct_ops once it is set.  That ensures that the value
11874          * never changed.  So, it is safe to skip EBUSY.
11875          */
11876         if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
11877                 free(link);
11878                 return libbpf_err_ptr(err);
11879         }
11880
11881         link->link.detach = bpf_link__detach_struct_ops;
11882
11883         if (!(map->def.map_flags & BPF_F_LINK)) {
11884                 /* w/o a real link */
11885                 link->link.fd = map->fd;
11886                 link->map_fd = -1;
11887                 return &link->link;
11888         }
11889
11890         fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
11891         if (fd < 0) {
11892                 free(link);
11893                 return libbpf_err_ptr(fd);
11894         }
11895
11896         link->link.fd = fd;
11897         link->map_fd = map->fd;
11898
11899         return &link->link;
11900 }
11901
11902 /*
11903  * Swap the back struct_ops of a link with a new struct_ops map.
11904  */
11905 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
11906 {
11907         struct bpf_link_struct_ops *st_ops_link;
11908         __u32 zero = 0;
11909         int err;
11910
11911         if (!bpf_map__is_struct_ops(map) || map->fd < 0)
11912                 return -EINVAL;
11913
11914         st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
11915         /* Ensure the type of a link is correct */
11916         if (st_ops_link->map_fd < 0)
11917                 return -EINVAL;
11918
11919         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
11920         /* It can be EBUSY if the map has been used to create or
11921          * update a link before.  We don't allow updating the value of
11922          * a struct_ops once it is set.  That ensures that the value
11923          * never changed.  So, it is safe to skip EBUSY.
11924          */
11925         if (err && err != -EBUSY)
11926                 return err;
11927
11928         err = bpf_link_update(link->fd, map->fd, NULL);
11929         if (err < 0)
11930                 return err;
11931
11932         st_ops_link->map_fd = map->fd;
11933
11934         return 0;
11935 }
11936
11937 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
11938                                                           void *private_data);
11939
11940 static enum bpf_perf_event_ret
11941 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
11942                        void **copy_mem, size_t *copy_size,
11943                        bpf_perf_event_print_t fn, void *private_data)
11944 {
11945         struct perf_event_mmap_page *header = mmap_mem;
11946         __u64 data_head = ring_buffer_read_head(header);
11947         __u64 data_tail = header->data_tail;
11948         void *base = ((__u8 *)header) + page_size;
11949         int ret = LIBBPF_PERF_EVENT_CONT;
11950         struct perf_event_header *ehdr;
11951         size_t ehdr_size;
11952
11953         while (data_head != data_tail) {
11954                 ehdr = base + (data_tail & (mmap_size - 1));
11955                 ehdr_size = ehdr->size;
11956
11957                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
11958                         void *copy_start = ehdr;
11959                         size_t len_first = base + mmap_size - copy_start;
11960                         size_t len_secnd = ehdr_size - len_first;
11961
11962                         if (*copy_size < ehdr_size) {
11963                                 free(*copy_mem);
11964                                 *copy_mem = malloc(ehdr_size);
11965                                 if (!*copy_mem) {
11966                                         *copy_size = 0;
11967                                         ret = LIBBPF_PERF_EVENT_ERROR;
11968                                         break;
11969                                 }
11970                                 *copy_size = ehdr_size;
11971                         }
11972
11973                         memcpy(*copy_mem, copy_start, len_first);
11974                         memcpy(*copy_mem + len_first, base, len_secnd);
11975                         ehdr = *copy_mem;
11976                 }
11977
11978                 ret = fn(ehdr, private_data);
11979                 data_tail += ehdr_size;
11980                 if (ret != LIBBPF_PERF_EVENT_CONT)
11981                         break;
11982         }
11983
11984         ring_buffer_write_tail(header, data_tail);
11985         return libbpf_err(ret);
11986 }
11987
11988 struct perf_buffer;
11989
11990 struct perf_buffer_params {
11991         struct perf_event_attr *attr;
11992         /* if event_cb is specified, it takes precendence */
11993         perf_buffer_event_fn event_cb;
11994         /* sample_cb and lost_cb are higher-level common-case callbacks */
11995         perf_buffer_sample_fn sample_cb;
11996         perf_buffer_lost_fn lost_cb;
11997         void *ctx;
11998         int cpu_cnt;
11999         int *cpus;
12000         int *map_keys;
12001 };
12002
12003 struct perf_cpu_buf {
12004         struct perf_buffer *pb;
12005         void *base; /* mmap()'ed memory */
12006         void *buf; /* for reconstructing segmented data */
12007         size_t buf_size;
12008         int fd;
12009         int cpu;
12010         int map_key;
12011 };
12012
12013 struct perf_buffer {
12014         perf_buffer_event_fn event_cb;
12015         perf_buffer_sample_fn sample_cb;
12016         perf_buffer_lost_fn lost_cb;
12017         void *ctx; /* passed into callbacks */
12018
12019         size_t page_size;
12020         size_t mmap_size;
12021         struct perf_cpu_buf **cpu_bufs;
12022         struct epoll_event *events;
12023         int cpu_cnt; /* number of allocated CPU buffers */
12024         int epoll_fd; /* perf event FD */
12025         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12026 };
12027
12028 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
12029                                       struct perf_cpu_buf *cpu_buf)
12030 {
12031         if (!cpu_buf)
12032                 return;
12033         if (cpu_buf->base &&
12034             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
12035                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
12036         if (cpu_buf->fd >= 0) {
12037                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
12038                 close(cpu_buf->fd);
12039         }
12040         free(cpu_buf->buf);
12041         free(cpu_buf);
12042 }
12043
12044 void perf_buffer__free(struct perf_buffer *pb)
12045 {
12046         int i;
12047
12048         if (IS_ERR_OR_NULL(pb))
12049                 return;
12050         if (pb->cpu_bufs) {
12051                 for (i = 0; i < pb->cpu_cnt; i++) {
12052                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12053
12054                         if (!cpu_buf)
12055                                 continue;
12056
12057                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
12058                         perf_buffer__free_cpu_buf(pb, cpu_buf);
12059                 }
12060                 free(pb->cpu_bufs);
12061         }
12062         if (pb->epoll_fd >= 0)
12063                 close(pb->epoll_fd);
12064         free(pb->events);
12065         free(pb);
12066 }
12067
12068 static struct perf_cpu_buf *
12069 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
12070                           int cpu, int map_key)
12071 {
12072         struct perf_cpu_buf *cpu_buf;
12073         char msg[STRERR_BUFSIZE];
12074         int err;
12075
12076         cpu_buf = calloc(1, sizeof(*cpu_buf));
12077         if (!cpu_buf)
12078                 return ERR_PTR(-ENOMEM);
12079
12080         cpu_buf->pb = pb;
12081         cpu_buf->cpu = cpu;
12082         cpu_buf->map_key = map_key;
12083
12084         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
12085                               -1, PERF_FLAG_FD_CLOEXEC);
12086         if (cpu_buf->fd < 0) {
12087                 err = -errno;
12088                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
12089                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12090                 goto error;
12091         }
12092
12093         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
12094                              PROT_READ | PROT_WRITE, MAP_SHARED,
12095                              cpu_buf->fd, 0);
12096         if (cpu_buf->base == MAP_FAILED) {
12097                 cpu_buf->base = NULL;
12098                 err = -errno;
12099                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
12100                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12101                 goto error;
12102         }
12103
12104         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
12105                 err = -errno;
12106                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
12107                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12108                 goto error;
12109         }
12110
12111         return cpu_buf;
12112
12113 error:
12114         perf_buffer__free_cpu_buf(pb, cpu_buf);
12115         return (struct perf_cpu_buf *)ERR_PTR(err);
12116 }
12117
12118 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12119                                               struct perf_buffer_params *p);
12120
12121 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
12122                                      perf_buffer_sample_fn sample_cb,
12123                                      perf_buffer_lost_fn lost_cb,
12124                                      void *ctx,
12125                                      const struct perf_buffer_opts *opts)
12126 {
12127         const size_t attr_sz = sizeof(struct perf_event_attr);
12128         struct perf_buffer_params p = {};
12129         struct perf_event_attr attr;
12130         __u32 sample_period;
12131
12132         if (!OPTS_VALID(opts, perf_buffer_opts))
12133                 return libbpf_err_ptr(-EINVAL);
12134
12135         sample_period = OPTS_GET(opts, sample_period, 1);
12136         if (!sample_period)
12137                 sample_period = 1;
12138
12139         memset(&attr, 0, attr_sz);
12140         attr.size = attr_sz;
12141         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
12142         attr.type = PERF_TYPE_SOFTWARE;
12143         attr.sample_type = PERF_SAMPLE_RAW;
12144         attr.sample_period = sample_period;
12145         attr.wakeup_events = sample_period;
12146
12147         p.attr = &attr;
12148         p.sample_cb = sample_cb;
12149         p.lost_cb = lost_cb;
12150         p.ctx = ctx;
12151
12152         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12153 }
12154
12155 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
12156                                          struct perf_event_attr *attr,
12157                                          perf_buffer_event_fn event_cb, void *ctx,
12158                                          const struct perf_buffer_raw_opts *opts)
12159 {
12160         struct perf_buffer_params p = {};
12161
12162         if (!attr)
12163                 return libbpf_err_ptr(-EINVAL);
12164
12165         if (!OPTS_VALID(opts, perf_buffer_raw_opts))
12166                 return libbpf_err_ptr(-EINVAL);
12167
12168         p.attr = attr;
12169         p.event_cb = event_cb;
12170         p.ctx = ctx;
12171         p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
12172         p.cpus = OPTS_GET(opts, cpus, NULL);
12173         p.map_keys = OPTS_GET(opts, map_keys, NULL);
12174
12175         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12176 }
12177
12178 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12179                                               struct perf_buffer_params *p)
12180 {
12181         const char *online_cpus_file = "/sys/devices/system/cpu/online";
12182         struct bpf_map_info map;
12183         char msg[STRERR_BUFSIZE];
12184         struct perf_buffer *pb;
12185         bool *online = NULL;
12186         __u32 map_info_len;
12187         int err, i, j, n;
12188
12189         if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
12190                 pr_warn("page count should be power of two, but is %zu\n",
12191                         page_cnt);
12192                 return ERR_PTR(-EINVAL);
12193         }
12194
12195         /* best-effort sanity checks */
12196         memset(&map, 0, sizeof(map));
12197         map_info_len = sizeof(map);
12198         err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
12199         if (err) {
12200                 err = -errno;
12201                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
12202                  * -EBADFD, -EFAULT, or -E2BIG on real error
12203                  */
12204                 if (err != -EINVAL) {
12205                         pr_warn("failed to get map info for map FD %d: %s\n",
12206                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
12207                         return ERR_PTR(err);
12208                 }
12209                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
12210                          map_fd);
12211         } else {
12212                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
12213                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
12214                                 map.name);
12215                         return ERR_PTR(-EINVAL);
12216                 }
12217         }
12218
12219         pb = calloc(1, sizeof(*pb));
12220         if (!pb)
12221                 return ERR_PTR(-ENOMEM);
12222
12223         pb->event_cb = p->event_cb;
12224         pb->sample_cb = p->sample_cb;
12225         pb->lost_cb = p->lost_cb;
12226         pb->ctx = p->ctx;
12227
12228         pb->page_size = getpagesize();
12229         pb->mmap_size = pb->page_size * page_cnt;
12230         pb->map_fd = map_fd;
12231
12232         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
12233         if (pb->epoll_fd < 0) {
12234                 err = -errno;
12235                 pr_warn("failed to create epoll instance: %s\n",
12236                         libbpf_strerror_r(err, msg, sizeof(msg)));
12237                 goto error;
12238         }
12239
12240         if (p->cpu_cnt > 0) {
12241                 pb->cpu_cnt = p->cpu_cnt;
12242         } else {
12243                 pb->cpu_cnt = libbpf_num_possible_cpus();
12244                 if (pb->cpu_cnt < 0) {
12245                         err = pb->cpu_cnt;
12246                         goto error;
12247                 }
12248                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
12249                         pb->cpu_cnt = map.max_entries;
12250         }
12251
12252         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
12253         if (!pb->events) {
12254                 err = -ENOMEM;
12255                 pr_warn("failed to allocate events: out of memory\n");
12256                 goto error;
12257         }
12258         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
12259         if (!pb->cpu_bufs) {
12260                 err = -ENOMEM;
12261                 pr_warn("failed to allocate buffers: out of memory\n");
12262                 goto error;
12263         }
12264
12265         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
12266         if (err) {
12267                 pr_warn("failed to get online CPU mask: %d\n", err);
12268                 goto error;
12269         }
12270
12271         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
12272                 struct perf_cpu_buf *cpu_buf;
12273                 int cpu, map_key;
12274
12275                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
12276                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
12277
12278                 /* in case user didn't explicitly requested particular CPUs to
12279                  * be attached to, skip offline/not present CPUs
12280                  */
12281                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
12282                         continue;
12283
12284                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
12285                 if (IS_ERR(cpu_buf)) {
12286                         err = PTR_ERR(cpu_buf);
12287                         goto error;
12288                 }
12289
12290                 pb->cpu_bufs[j] = cpu_buf;
12291
12292                 err = bpf_map_update_elem(pb->map_fd, &map_key,
12293                                           &cpu_buf->fd, 0);
12294                 if (err) {
12295                         err = -errno;
12296                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
12297                                 cpu, map_key, cpu_buf->fd,
12298                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12299                         goto error;
12300                 }
12301
12302                 pb->events[j].events = EPOLLIN;
12303                 pb->events[j].data.ptr = cpu_buf;
12304                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
12305                               &pb->events[j]) < 0) {
12306                         err = -errno;
12307                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
12308                                 cpu, cpu_buf->fd,
12309                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12310                         goto error;
12311                 }
12312                 j++;
12313         }
12314         pb->cpu_cnt = j;
12315         free(online);
12316
12317         return pb;
12318
12319 error:
12320         free(online);
12321         if (pb)
12322                 perf_buffer__free(pb);
12323         return ERR_PTR(err);
12324 }
12325
12326 struct perf_sample_raw {
12327         struct perf_event_header header;
12328         uint32_t size;
12329         char data[];
12330 };
12331
12332 struct perf_sample_lost {
12333         struct perf_event_header header;
12334         uint64_t id;
12335         uint64_t lost;
12336         uint64_t sample_id;
12337 };
12338
12339 static enum bpf_perf_event_ret
12340 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
12341 {
12342         struct perf_cpu_buf *cpu_buf = ctx;
12343         struct perf_buffer *pb = cpu_buf->pb;
12344         void *data = e;
12345
12346         /* user wants full control over parsing perf event */
12347         if (pb->event_cb)
12348                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
12349
12350         switch (e->type) {
12351         case PERF_RECORD_SAMPLE: {
12352                 struct perf_sample_raw *s = data;
12353
12354                 if (pb->sample_cb)
12355                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
12356                 break;
12357         }
12358         case PERF_RECORD_LOST: {
12359                 struct perf_sample_lost *s = data;
12360
12361                 if (pb->lost_cb)
12362                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
12363                 break;
12364         }
12365         default:
12366                 pr_warn("unknown perf sample type %d\n", e->type);
12367                 return LIBBPF_PERF_EVENT_ERROR;
12368         }
12369         return LIBBPF_PERF_EVENT_CONT;
12370 }
12371
12372 static int perf_buffer__process_records(struct perf_buffer *pb,
12373                                         struct perf_cpu_buf *cpu_buf)
12374 {
12375         enum bpf_perf_event_ret ret;
12376
12377         ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
12378                                      pb->page_size, &cpu_buf->buf,
12379                                      &cpu_buf->buf_size,
12380                                      perf_buffer__process_record, cpu_buf);
12381         if (ret != LIBBPF_PERF_EVENT_CONT)
12382                 return ret;
12383         return 0;
12384 }
12385
12386 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
12387 {
12388         return pb->epoll_fd;
12389 }
12390
12391 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
12392 {
12393         int i, cnt, err;
12394
12395         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
12396         if (cnt < 0)
12397                 return -errno;
12398
12399         for (i = 0; i < cnt; i++) {
12400                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
12401
12402                 err = perf_buffer__process_records(pb, cpu_buf);
12403                 if (err) {
12404                         pr_warn("error while processing records: %d\n", err);
12405                         return libbpf_err(err);
12406                 }
12407         }
12408         return cnt;
12409 }
12410
12411 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
12412  * manager.
12413  */
12414 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
12415 {
12416         return pb->cpu_cnt;
12417 }
12418
12419 /*
12420  * Return perf_event FD of a ring buffer in *buf_idx* slot of
12421  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
12422  * select()/poll()/epoll() Linux syscalls.
12423  */
12424 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
12425 {
12426         struct perf_cpu_buf *cpu_buf;
12427
12428         if (buf_idx >= pb->cpu_cnt)
12429                 return libbpf_err(-EINVAL);
12430
12431         cpu_buf = pb->cpu_bufs[buf_idx];
12432         if (!cpu_buf)
12433                 return libbpf_err(-ENOENT);
12434
12435         return cpu_buf->fd;
12436 }
12437
12438 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
12439 {
12440         struct perf_cpu_buf *cpu_buf;
12441
12442         if (buf_idx >= pb->cpu_cnt)
12443                 return libbpf_err(-EINVAL);
12444
12445         cpu_buf = pb->cpu_bufs[buf_idx];
12446         if (!cpu_buf)
12447                 return libbpf_err(-ENOENT);
12448
12449         *buf = cpu_buf->base;
12450         *buf_size = pb->mmap_size;
12451         return 0;
12452 }
12453
12454 /*
12455  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
12456  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
12457  * consume, do nothing and return success.
12458  * Returns:
12459  *   - 0 on success;
12460  *   - <0 on failure.
12461  */
12462 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
12463 {
12464         struct perf_cpu_buf *cpu_buf;
12465
12466         if (buf_idx >= pb->cpu_cnt)
12467                 return libbpf_err(-EINVAL);
12468
12469         cpu_buf = pb->cpu_bufs[buf_idx];
12470         if (!cpu_buf)
12471                 return libbpf_err(-ENOENT);
12472
12473         return perf_buffer__process_records(pb, cpu_buf);
12474 }
12475
12476 int perf_buffer__consume(struct perf_buffer *pb)
12477 {
12478         int i, err;
12479
12480         for (i = 0; i < pb->cpu_cnt; i++) {
12481                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12482
12483                 if (!cpu_buf)
12484                         continue;
12485
12486                 err = perf_buffer__process_records(pb, cpu_buf);
12487                 if (err) {
12488                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
12489                         return libbpf_err(err);
12490                 }
12491         }
12492         return 0;
12493 }
12494
12495 int bpf_program__set_attach_target(struct bpf_program *prog,
12496                                    int attach_prog_fd,
12497                                    const char *attach_func_name)
12498 {
12499         int btf_obj_fd = 0, btf_id = 0, err;
12500
12501         if (!prog || attach_prog_fd < 0)
12502                 return libbpf_err(-EINVAL);
12503
12504         if (prog->obj->loaded)
12505                 return libbpf_err(-EINVAL);
12506
12507         if (attach_prog_fd && !attach_func_name) {
12508                 /* remember attach_prog_fd and let bpf_program__load() find
12509                  * BTF ID during the program load
12510                  */
12511                 prog->attach_prog_fd = attach_prog_fd;
12512                 return 0;
12513         }
12514
12515         if (attach_prog_fd) {
12516                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
12517                                                  attach_prog_fd);
12518                 if (btf_id < 0)
12519                         return libbpf_err(btf_id);
12520         } else {
12521                 if (!attach_func_name)
12522                         return libbpf_err(-EINVAL);
12523
12524                 /* load btf_vmlinux, if not yet */
12525                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
12526                 if (err)
12527                         return libbpf_err(err);
12528                 err = find_kernel_btf_id(prog->obj, attach_func_name,
12529                                          prog->expected_attach_type,
12530                                          &btf_obj_fd, &btf_id);
12531                 if (err)
12532                         return libbpf_err(err);
12533         }
12534
12535         prog->attach_btf_id = btf_id;
12536         prog->attach_btf_obj_fd = btf_obj_fd;
12537         prog->attach_prog_fd = attach_prog_fd;
12538         return 0;
12539 }
12540
12541 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
12542 {
12543         int err = 0, n, len, start, end = -1;
12544         bool *tmp;
12545
12546         *mask = NULL;
12547         *mask_sz = 0;
12548
12549         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
12550         while (*s) {
12551                 if (*s == ',' || *s == '\n') {
12552                         s++;
12553                         continue;
12554                 }
12555                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
12556                 if (n <= 0 || n > 2) {
12557                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
12558                         err = -EINVAL;
12559                         goto cleanup;
12560                 } else if (n == 1) {
12561                         end = start;
12562                 }
12563                 if (start < 0 || start > end) {
12564                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
12565                                 start, end, s);
12566                         err = -EINVAL;
12567                         goto cleanup;
12568                 }
12569                 tmp = realloc(*mask, end + 1);
12570                 if (!tmp) {
12571                         err = -ENOMEM;
12572                         goto cleanup;
12573                 }
12574                 *mask = tmp;
12575                 memset(tmp + *mask_sz, 0, start - *mask_sz);
12576                 memset(tmp + start, 1, end - start + 1);
12577                 *mask_sz = end + 1;
12578                 s += len;
12579         }
12580         if (!*mask_sz) {
12581                 pr_warn("Empty CPU range\n");
12582                 return -EINVAL;
12583         }
12584         return 0;
12585 cleanup:
12586         free(*mask);
12587         *mask = NULL;
12588         return err;
12589 }
12590
12591 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
12592 {
12593         int fd, err = 0, len;
12594         char buf[128];
12595
12596         fd = open(fcpu, O_RDONLY | O_CLOEXEC);
12597         if (fd < 0) {
12598                 err = -errno;
12599                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
12600                 return err;
12601         }
12602         len = read(fd, buf, sizeof(buf));
12603         close(fd);
12604         if (len <= 0) {
12605                 err = len ? -errno : -EINVAL;
12606                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
12607                 return err;
12608         }
12609         if (len >= sizeof(buf)) {
12610                 pr_warn("CPU mask is too big in file %s\n", fcpu);
12611                 return -E2BIG;
12612         }
12613         buf[len] = '\0';
12614
12615         return parse_cpu_mask_str(buf, mask, mask_sz);
12616 }
12617
12618 int libbpf_num_possible_cpus(void)
12619 {
12620         static const char *fcpu = "/sys/devices/system/cpu/possible";
12621         static int cpus;
12622         int err, n, i, tmp_cpus;
12623         bool *mask;
12624
12625         tmp_cpus = READ_ONCE(cpus);
12626         if (tmp_cpus > 0)
12627                 return tmp_cpus;
12628
12629         err = parse_cpu_mask_file(fcpu, &mask, &n);
12630         if (err)
12631                 return libbpf_err(err);
12632
12633         tmp_cpus = 0;
12634         for (i = 0; i < n; i++) {
12635                 if (mask[i])
12636                         tmp_cpus++;
12637         }
12638         free(mask);
12639
12640         WRITE_ONCE(cpus, tmp_cpus);
12641         return tmp_cpus;
12642 }
12643
12644 static int populate_skeleton_maps(const struct bpf_object *obj,
12645                                   struct bpf_map_skeleton *maps,
12646                                   size_t map_cnt)
12647 {
12648         int i;
12649
12650         for (i = 0; i < map_cnt; i++) {
12651                 struct bpf_map **map = maps[i].map;
12652                 const char *name = maps[i].name;
12653                 void **mmaped = maps[i].mmaped;
12654
12655                 *map = bpf_object__find_map_by_name(obj, name);
12656                 if (!*map) {
12657                         pr_warn("failed to find skeleton map '%s'\n", name);
12658                         return -ESRCH;
12659                 }
12660
12661                 /* externs shouldn't be pre-setup from user code */
12662                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
12663                         *mmaped = (*map)->mmaped;
12664         }
12665         return 0;
12666 }
12667
12668 static int populate_skeleton_progs(const struct bpf_object *obj,
12669                                    struct bpf_prog_skeleton *progs,
12670                                    size_t prog_cnt)
12671 {
12672         int i;
12673
12674         for (i = 0; i < prog_cnt; i++) {
12675                 struct bpf_program **prog = progs[i].prog;
12676                 const char *name = progs[i].name;
12677
12678                 *prog = bpf_object__find_program_by_name(obj, name);
12679                 if (!*prog) {
12680                         pr_warn("failed to find skeleton program '%s'\n", name);
12681                         return -ESRCH;
12682                 }
12683         }
12684         return 0;
12685 }
12686
12687 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
12688                               const struct bpf_object_open_opts *opts)
12689 {
12690         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
12691                 .object_name = s->name,
12692         );
12693         struct bpf_object *obj;
12694         int err;
12695
12696         /* Attempt to preserve opts->object_name, unless overriden by user
12697          * explicitly. Overwriting object name for skeletons is discouraged,
12698          * as it breaks global data maps, because they contain object name
12699          * prefix as their own map name prefix. When skeleton is generated,
12700          * bpftool is making an assumption that this name will stay the same.
12701          */
12702         if (opts) {
12703                 memcpy(&skel_opts, opts, sizeof(*opts));
12704                 if (!opts->object_name)
12705                         skel_opts.object_name = s->name;
12706         }
12707
12708         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
12709         err = libbpf_get_error(obj);
12710         if (err) {
12711                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
12712                         s->name, err);
12713                 return libbpf_err(err);
12714         }
12715
12716         *s->obj = obj;
12717         err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
12718         if (err) {
12719                 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
12720                 return libbpf_err(err);
12721         }
12722
12723         err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
12724         if (err) {
12725                 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
12726                 return libbpf_err(err);
12727         }
12728
12729         return 0;
12730 }
12731
12732 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
12733 {
12734         int err, len, var_idx, i;
12735         const char *var_name;
12736         const struct bpf_map *map;
12737         struct btf *btf;
12738         __u32 map_type_id;
12739         const struct btf_type *map_type, *var_type;
12740         const struct bpf_var_skeleton *var_skel;
12741         struct btf_var_secinfo *var;
12742
12743         if (!s->obj)
12744                 return libbpf_err(-EINVAL);
12745
12746         btf = bpf_object__btf(s->obj);
12747         if (!btf) {
12748                 pr_warn("subskeletons require BTF at runtime (object %s)\n",
12749                         bpf_object__name(s->obj));
12750                 return libbpf_err(-errno);
12751         }
12752
12753         err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
12754         if (err) {
12755                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12756                 return libbpf_err(err);
12757         }
12758
12759         err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
12760         if (err) {
12761                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12762                 return libbpf_err(err);
12763         }
12764
12765         for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
12766                 var_skel = &s->vars[var_idx];
12767                 map = *var_skel->map;
12768                 map_type_id = bpf_map__btf_value_type_id(map);
12769                 map_type = btf__type_by_id(btf, map_type_id);
12770
12771                 if (!btf_is_datasec(map_type)) {
12772                         pr_warn("type for map '%1$s' is not a datasec: %2$s",
12773                                 bpf_map__name(map),
12774                                 __btf_kind_str(btf_kind(map_type)));
12775                         return libbpf_err(-EINVAL);
12776                 }
12777
12778                 len = btf_vlen(map_type);
12779                 var = btf_var_secinfos(map_type);
12780                 for (i = 0; i < len; i++, var++) {
12781                         var_type = btf__type_by_id(btf, var->type);
12782                         var_name = btf__name_by_offset(btf, var_type->name_off);
12783                         if (strcmp(var_name, var_skel->name) == 0) {
12784                                 *var_skel->addr = map->mmaped + var->offset;
12785                                 break;
12786                         }
12787                 }
12788         }
12789         return 0;
12790 }
12791
12792 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
12793 {
12794         if (!s)
12795                 return;
12796         free(s->maps);
12797         free(s->progs);
12798         free(s->vars);
12799         free(s);
12800 }
12801
12802 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
12803 {
12804         int i, err;
12805
12806         err = bpf_object__load(*s->obj);
12807         if (err) {
12808                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
12809                 return libbpf_err(err);
12810         }
12811
12812         for (i = 0; i < s->map_cnt; i++) {
12813                 struct bpf_map *map = *s->maps[i].map;
12814                 size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
12815                 int prot, map_fd = bpf_map__fd(map);
12816                 void **mmaped = s->maps[i].mmaped;
12817
12818                 if (!mmaped)
12819                         continue;
12820
12821                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
12822                         *mmaped = NULL;
12823                         continue;
12824                 }
12825
12826                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
12827                         prot = PROT_READ;
12828                 else
12829                         prot = PROT_READ | PROT_WRITE;
12830
12831                 /* Remap anonymous mmap()-ed "map initialization image" as
12832                  * a BPF map-backed mmap()-ed memory, but preserving the same
12833                  * memory address. This will cause kernel to change process'
12834                  * page table to point to a different piece of kernel memory,
12835                  * but from userspace point of view memory address (and its
12836                  * contents, being identical at this point) will stay the
12837                  * same. This mapping will be released by bpf_object__close()
12838                  * as per normal clean up procedure, so we don't need to worry
12839                  * about it from skeleton's clean up perspective.
12840                  */
12841                 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
12842                 if (*mmaped == MAP_FAILED) {
12843                         err = -errno;
12844                         *mmaped = NULL;
12845                         pr_warn("failed to re-mmap() map '%s': %d\n",
12846                                  bpf_map__name(map), err);
12847                         return libbpf_err(err);
12848                 }
12849         }
12850
12851         return 0;
12852 }
12853
12854 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
12855 {
12856         int i, err;
12857
12858         for (i = 0; i < s->prog_cnt; i++) {
12859                 struct bpf_program *prog = *s->progs[i].prog;
12860                 struct bpf_link **link = s->progs[i].link;
12861
12862                 if (!prog->autoload || !prog->autoattach)
12863                         continue;
12864
12865                 /* auto-attaching not supported for this program */
12866                 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12867                         continue;
12868
12869                 /* if user already set the link manually, don't attempt auto-attach */
12870                 if (*link)
12871                         continue;
12872
12873                 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
12874                 if (err) {
12875                         pr_warn("prog '%s': failed to auto-attach: %d\n",
12876                                 bpf_program__name(prog), err);
12877                         return libbpf_err(err);
12878                 }
12879
12880                 /* It's possible that for some SEC() definitions auto-attach
12881                  * is supported in some cases (e.g., if definition completely
12882                  * specifies target information), but is not in other cases.
12883                  * SEC("uprobe") is one such case. If user specified target
12884                  * binary and function name, such BPF program can be
12885                  * auto-attached. But if not, it shouldn't trigger skeleton's
12886                  * attach to fail. It should just be skipped.
12887                  * attach_fn signals such case with returning 0 (no error) and
12888                  * setting link to NULL.
12889                  */
12890         }
12891
12892         return 0;
12893 }
12894
12895 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
12896 {
12897         int i;
12898
12899         for (i = 0; i < s->prog_cnt; i++) {
12900                 struct bpf_link **link = s->progs[i].link;
12901
12902                 bpf_link__destroy(*link);
12903                 *link = NULL;
12904         }
12905 }
12906
12907 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
12908 {
12909         if (!s)
12910                 return;
12911
12912         if (s->progs)
12913                 bpf_object__detach_skeleton(s);
12914         if (s->obj)
12915                 bpf_object__close(*s->obj);
12916         free(s->maps);
12917         free(s->progs);
12918         free(s);
12919 }