Merge tag 'mm-nonmm-stable-2023-04-27-16-01' of git://git.kernel.org/pub/scm/linux...
[platform/kernel/linux-starfive.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <limits.h>
22 #include <string.h>
23 #include <unistd.h>
24 #include <endian.h>
25 #include <fcntl.h>
26 #include <errno.h>
27 #include <ctype.h>
28 #include <asm/unistd.h>
29 #include <linux/err.h>
30 #include <linux/kernel.h>
31 #include <linux/bpf.h>
32 #include <linux/btf.h>
33 #include <linux/filter.h>
34 #include <linux/limits.h>
35 #include <linux/perf_event.h>
36 #include <linux/ring_buffer.h>
37 #include <sys/epoll.h>
38 #include <sys/ioctl.h>
39 #include <sys/mman.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <sys/vfs.h>
43 #include <sys/utsname.h>
44 #include <sys/resource.h>
45 #include <libelf.h>
46 #include <gelf.h>
47 #include <zlib.h>
48
49 #include "libbpf.h"
50 #include "bpf.h"
51 #include "btf.h"
52 #include "str_error.h"
53 #include "libbpf_internal.h"
54 #include "hashmap.h"
55 #include "bpf_gen_internal.h"
56 #include "zip.h"
57
58 #ifndef BPF_FS_MAGIC
59 #define BPF_FS_MAGIC            0xcafe4a11
60 #endif
61
62 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
63
64 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
65  * compilation if user enables corresponding warning. Disable it explicitly.
66  */
67 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
68
69 #define __printf(a, b)  __attribute__((format(printf, a, b)))
70
71 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
72 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
73
74 static const char * const attach_type_name[] = {
75         [BPF_CGROUP_INET_INGRESS]       = "cgroup_inet_ingress",
76         [BPF_CGROUP_INET_EGRESS]        = "cgroup_inet_egress",
77         [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
78         [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
79         [BPF_CGROUP_SOCK_OPS]           = "cgroup_sock_ops",
80         [BPF_CGROUP_DEVICE]             = "cgroup_device",
81         [BPF_CGROUP_INET4_BIND]         = "cgroup_inet4_bind",
82         [BPF_CGROUP_INET6_BIND]         = "cgroup_inet6_bind",
83         [BPF_CGROUP_INET4_CONNECT]      = "cgroup_inet4_connect",
84         [BPF_CGROUP_INET6_CONNECT]      = "cgroup_inet6_connect",
85         [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
86         [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
87         [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
88         [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
89         [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
90         [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
91         [BPF_CGROUP_UDP4_SENDMSG]       = "cgroup_udp4_sendmsg",
92         [BPF_CGROUP_UDP6_SENDMSG]       = "cgroup_udp6_sendmsg",
93         [BPF_CGROUP_SYSCTL]             = "cgroup_sysctl",
94         [BPF_CGROUP_UDP4_RECVMSG]       = "cgroup_udp4_recvmsg",
95         [BPF_CGROUP_UDP6_RECVMSG]       = "cgroup_udp6_recvmsg",
96         [BPF_CGROUP_GETSOCKOPT]         = "cgroup_getsockopt",
97         [BPF_CGROUP_SETSOCKOPT]         = "cgroup_setsockopt",
98         [BPF_SK_SKB_STREAM_PARSER]      = "sk_skb_stream_parser",
99         [BPF_SK_SKB_STREAM_VERDICT]     = "sk_skb_stream_verdict",
100         [BPF_SK_SKB_VERDICT]            = "sk_skb_verdict",
101         [BPF_SK_MSG_VERDICT]            = "sk_msg_verdict",
102         [BPF_LIRC_MODE2]                = "lirc_mode2",
103         [BPF_FLOW_DISSECTOR]            = "flow_dissector",
104         [BPF_TRACE_RAW_TP]              = "trace_raw_tp",
105         [BPF_TRACE_FENTRY]              = "trace_fentry",
106         [BPF_TRACE_FEXIT]               = "trace_fexit",
107         [BPF_MODIFY_RETURN]             = "modify_return",
108         [BPF_LSM_MAC]                   = "lsm_mac",
109         [BPF_LSM_CGROUP]                = "lsm_cgroup",
110         [BPF_SK_LOOKUP]                 = "sk_lookup",
111         [BPF_TRACE_ITER]                = "trace_iter",
112         [BPF_XDP_DEVMAP]                = "xdp_devmap",
113         [BPF_XDP_CPUMAP]                = "xdp_cpumap",
114         [BPF_XDP]                       = "xdp",
115         [BPF_SK_REUSEPORT_SELECT]       = "sk_reuseport_select",
116         [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
117         [BPF_PERF_EVENT]                = "perf_event",
118         [BPF_TRACE_KPROBE_MULTI]        = "trace_kprobe_multi",
119         [BPF_STRUCT_OPS]                = "struct_ops",
120 };
121
122 static const char * const link_type_name[] = {
123         [BPF_LINK_TYPE_UNSPEC]                  = "unspec",
124         [BPF_LINK_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
125         [BPF_LINK_TYPE_TRACING]                 = "tracing",
126         [BPF_LINK_TYPE_CGROUP]                  = "cgroup",
127         [BPF_LINK_TYPE_ITER]                    = "iter",
128         [BPF_LINK_TYPE_NETNS]                   = "netns",
129         [BPF_LINK_TYPE_XDP]                     = "xdp",
130         [BPF_LINK_TYPE_PERF_EVENT]              = "perf_event",
131         [BPF_LINK_TYPE_KPROBE_MULTI]            = "kprobe_multi",
132         [BPF_LINK_TYPE_STRUCT_OPS]              = "struct_ops",
133         [BPF_LINK_TYPE_NETFILTER]               = "netfilter",
134 };
135
136 static const char * const map_type_name[] = {
137         [BPF_MAP_TYPE_UNSPEC]                   = "unspec",
138         [BPF_MAP_TYPE_HASH]                     = "hash",
139         [BPF_MAP_TYPE_ARRAY]                    = "array",
140         [BPF_MAP_TYPE_PROG_ARRAY]               = "prog_array",
141         [BPF_MAP_TYPE_PERF_EVENT_ARRAY]         = "perf_event_array",
142         [BPF_MAP_TYPE_PERCPU_HASH]              = "percpu_hash",
143         [BPF_MAP_TYPE_PERCPU_ARRAY]             = "percpu_array",
144         [BPF_MAP_TYPE_STACK_TRACE]              = "stack_trace",
145         [BPF_MAP_TYPE_CGROUP_ARRAY]             = "cgroup_array",
146         [BPF_MAP_TYPE_LRU_HASH]                 = "lru_hash",
147         [BPF_MAP_TYPE_LRU_PERCPU_HASH]          = "lru_percpu_hash",
148         [BPF_MAP_TYPE_LPM_TRIE]                 = "lpm_trie",
149         [BPF_MAP_TYPE_ARRAY_OF_MAPS]            = "array_of_maps",
150         [BPF_MAP_TYPE_HASH_OF_MAPS]             = "hash_of_maps",
151         [BPF_MAP_TYPE_DEVMAP]                   = "devmap",
152         [BPF_MAP_TYPE_DEVMAP_HASH]              = "devmap_hash",
153         [BPF_MAP_TYPE_SOCKMAP]                  = "sockmap",
154         [BPF_MAP_TYPE_CPUMAP]                   = "cpumap",
155         [BPF_MAP_TYPE_XSKMAP]                   = "xskmap",
156         [BPF_MAP_TYPE_SOCKHASH]                 = "sockhash",
157         [BPF_MAP_TYPE_CGROUP_STORAGE]           = "cgroup_storage",
158         [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]      = "reuseport_sockarray",
159         [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
160         [BPF_MAP_TYPE_QUEUE]                    = "queue",
161         [BPF_MAP_TYPE_STACK]                    = "stack",
162         [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
163         [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
164         [BPF_MAP_TYPE_RINGBUF]                  = "ringbuf",
165         [BPF_MAP_TYPE_INODE_STORAGE]            = "inode_storage",
166         [BPF_MAP_TYPE_TASK_STORAGE]             = "task_storage",
167         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
168         [BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
169         [BPF_MAP_TYPE_CGRP_STORAGE]             = "cgrp_storage",
170 };
171
172 static const char * const prog_type_name[] = {
173         [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
174         [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
175         [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
176         [BPF_PROG_TYPE_SCHED_CLS]               = "sched_cls",
177         [BPF_PROG_TYPE_SCHED_ACT]               = "sched_act",
178         [BPF_PROG_TYPE_TRACEPOINT]              = "tracepoint",
179         [BPF_PROG_TYPE_XDP]                     = "xdp",
180         [BPF_PROG_TYPE_PERF_EVENT]              = "perf_event",
181         [BPF_PROG_TYPE_CGROUP_SKB]              = "cgroup_skb",
182         [BPF_PROG_TYPE_CGROUP_SOCK]             = "cgroup_sock",
183         [BPF_PROG_TYPE_LWT_IN]                  = "lwt_in",
184         [BPF_PROG_TYPE_LWT_OUT]                 = "lwt_out",
185         [BPF_PROG_TYPE_LWT_XMIT]                = "lwt_xmit",
186         [BPF_PROG_TYPE_SOCK_OPS]                = "sock_ops",
187         [BPF_PROG_TYPE_SK_SKB]                  = "sk_skb",
188         [BPF_PROG_TYPE_CGROUP_DEVICE]           = "cgroup_device",
189         [BPF_PROG_TYPE_SK_MSG]                  = "sk_msg",
190         [BPF_PROG_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
191         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]        = "cgroup_sock_addr",
192         [BPF_PROG_TYPE_LWT_SEG6LOCAL]           = "lwt_seg6local",
193         [BPF_PROG_TYPE_LIRC_MODE2]              = "lirc_mode2",
194         [BPF_PROG_TYPE_SK_REUSEPORT]            = "sk_reuseport",
195         [BPF_PROG_TYPE_FLOW_DISSECTOR]          = "flow_dissector",
196         [BPF_PROG_TYPE_CGROUP_SYSCTL]           = "cgroup_sysctl",
197         [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
198         [BPF_PROG_TYPE_CGROUP_SOCKOPT]          = "cgroup_sockopt",
199         [BPF_PROG_TYPE_TRACING]                 = "tracing",
200         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
201         [BPF_PROG_TYPE_EXT]                     = "ext",
202         [BPF_PROG_TYPE_LSM]                     = "lsm",
203         [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
204         [BPF_PROG_TYPE_SYSCALL]                 = "syscall",
205         [BPF_PROG_TYPE_NETFILTER]               = "netfilter",
206 };
207
208 static int __base_pr(enum libbpf_print_level level, const char *format,
209                      va_list args)
210 {
211         if (level == LIBBPF_DEBUG)
212                 return 0;
213
214         return vfprintf(stderr, format, args);
215 }
216
217 static libbpf_print_fn_t __libbpf_pr = __base_pr;
218
219 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
220 {
221         libbpf_print_fn_t old_print_fn;
222
223         old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
224
225         return old_print_fn;
226 }
227
228 __printf(2, 3)
229 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
230 {
231         va_list args;
232         int old_errno;
233         libbpf_print_fn_t print_fn;
234
235         print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
236         if (!print_fn)
237                 return;
238
239         old_errno = errno;
240
241         va_start(args, format);
242         __libbpf_pr(level, format, args);
243         va_end(args);
244
245         errno = old_errno;
246 }
247
248 static void pr_perm_msg(int err)
249 {
250         struct rlimit limit;
251         char buf[100];
252
253         if (err != -EPERM || geteuid() != 0)
254                 return;
255
256         err = getrlimit(RLIMIT_MEMLOCK, &limit);
257         if (err)
258                 return;
259
260         if (limit.rlim_cur == RLIM_INFINITY)
261                 return;
262
263         if (limit.rlim_cur < 1024)
264                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
265         else if (limit.rlim_cur < 1024*1024)
266                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
267         else
268                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
269
270         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
271                 buf);
272 }
273
274 #define STRERR_BUFSIZE  128
275
276 /* Copied from tools/perf/util/util.h */
277 #ifndef zfree
278 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
279 #endif
280
281 #ifndef zclose
282 # define zclose(fd) ({                  \
283         int ___err = 0;                 \
284         if ((fd) >= 0)                  \
285                 ___err = close((fd));   \
286         fd = -1;                        \
287         ___err; })
288 #endif
289
290 static inline __u64 ptr_to_u64(const void *ptr)
291 {
292         return (__u64) (unsigned long) ptr;
293 }
294
295 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
296 {
297         /* as of v1.0 libbpf_set_strict_mode() is a no-op */
298         return 0;
299 }
300
301 __u32 libbpf_major_version(void)
302 {
303         return LIBBPF_MAJOR_VERSION;
304 }
305
306 __u32 libbpf_minor_version(void)
307 {
308         return LIBBPF_MINOR_VERSION;
309 }
310
311 const char *libbpf_version_string(void)
312 {
313 #define __S(X) #X
314 #define _S(X) __S(X)
315         return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
316 #undef _S
317 #undef __S
318 }
319
320 enum reloc_type {
321         RELO_LD64,
322         RELO_CALL,
323         RELO_DATA,
324         RELO_EXTERN_LD64,
325         RELO_EXTERN_CALL,
326         RELO_SUBPROG_ADDR,
327         RELO_CORE,
328 };
329
330 struct reloc_desc {
331         enum reloc_type type;
332         int insn_idx;
333         union {
334                 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
335                 struct {
336                         int map_idx;
337                         int sym_off;
338                         int ext_idx;
339                 };
340         };
341 };
342
343 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
344 enum sec_def_flags {
345         SEC_NONE = 0,
346         /* expected_attach_type is optional, if kernel doesn't support that */
347         SEC_EXP_ATTACH_OPT = 1,
348         /* legacy, only used by libbpf_get_type_names() and
349          * libbpf_attach_type_by_name(), not used by libbpf itself at all.
350          * This used to be associated with cgroup (and few other) BPF programs
351          * that were attachable through BPF_PROG_ATTACH command. Pretty
352          * meaningless nowadays, though.
353          */
354         SEC_ATTACHABLE = 2,
355         SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
356         /* attachment target is specified through BTF ID in either kernel or
357          * other BPF program's BTF object
358          */
359         SEC_ATTACH_BTF = 4,
360         /* BPF program type allows sleeping/blocking in kernel */
361         SEC_SLEEPABLE = 8,
362         /* BPF program support non-linear XDP buffer */
363         SEC_XDP_FRAGS = 16,
364 };
365
366 struct bpf_sec_def {
367         char *sec;
368         enum bpf_prog_type prog_type;
369         enum bpf_attach_type expected_attach_type;
370         long cookie;
371         int handler_id;
372
373         libbpf_prog_setup_fn_t prog_setup_fn;
374         libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
375         libbpf_prog_attach_fn_t prog_attach_fn;
376 };
377
378 /*
379  * bpf_prog should be a better name but it has been used in
380  * linux/filter.h.
381  */
382 struct bpf_program {
383         char *name;
384         char *sec_name;
385         size_t sec_idx;
386         const struct bpf_sec_def *sec_def;
387         /* this program's instruction offset (in number of instructions)
388          * within its containing ELF section
389          */
390         size_t sec_insn_off;
391         /* number of original instructions in ELF section belonging to this
392          * program, not taking into account subprogram instructions possible
393          * appended later during relocation
394          */
395         size_t sec_insn_cnt;
396         /* Offset (in number of instructions) of the start of instruction
397          * belonging to this BPF program  within its containing main BPF
398          * program. For the entry-point (main) BPF program, this is always
399          * zero. For a sub-program, this gets reset before each of main BPF
400          * programs are processed and relocated and is used to determined
401          * whether sub-program was already appended to the main program, and
402          * if yes, at which instruction offset.
403          */
404         size_t sub_insn_off;
405
406         /* instructions that belong to BPF program; insns[0] is located at
407          * sec_insn_off instruction within its ELF section in ELF file, so
408          * when mapping ELF file instruction index to the local instruction,
409          * one needs to subtract sec_insn_off; and vice versa.
410          */
411         struct bpf_insn *insns;
412         /* actual number of instruction in this BPF program's image; for
413          * entry-point BPF programs this includes the size of main program
414          * itself plus all the used sub-programs, appended at the end
415          */
416         size_t insns_cnt;
417
418         struct reloc_desc *reloc_desc;
419         int nr_reloc;
420
421         /* BPF verifier log settings */
422         char *log_buf;
423         size_t log_size;
424         __u32 log_level;
425
426         struct bpf_object *obj;
427
428         int fd;
429         bool autoload;
430         bool autoattach;
431         bool mark_btf_static;
432         enum bpf_prog_type type;
433         enum bpf_attach_type expected_attach_type;
434
435         int prog_ifindex;
436         __u32 attach_btf_obj_fd;
437         __u32 attach_btf_id;
438         __u32 attach_prog_fd;
439
440         void *func_info;
441         __u32 func_info_rec_size;
442         __u32 func_info_cnt;
443
444         void *line_info;
445         __u32 line_info_rec_size;
446         __u32 line_info_cnt;
447         __u32 prog_flags;
448 };
449
450 struct bpf_struct_ops {
451         const char *tname;
452         const struct btf_type *type;
453         struct bpf_program **progs;
454         __u32 *kern_func_off;
455         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
456         void *data;
457         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
458          *      btf_vmlinux's format.
459          * struct bpf_struct_ops_tcp_congestion_ops {
460          *      [... some other kernel fields ...]
461          *      struct tcp_congestion_ops data;
462          * }
463          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
464          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
465          * from "data".
466          */
467         void *kern_vdata;
468         __u32 type_id;
469 };
470
471 #define DATA_SEC ".data"
472 #define BSS_SEC ".bss"
473 #define RODATA_SEC ".rodata"
474 #define KCONFIG_SEC ".kconfig"
475 #define KSYMS_SEC ".ksyms"
476 #define STRUCT_OPS_SEC ".struct_ops"
477 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
478
479 enum libbpf_map_type {
480         LIBBPF_MAP_UNSPEC,
481         LIBBPF_MAP_DATA,
482         LIBBPF_MAP_BSS,
483         LIBBPF_MAP_RODATA,
484         LIBBPF_MAP_KCONFIG,
485 };
486
487 struct bpf_map_def {
488         unsigned int type;
489         unsigned int key_size;
490         unsigned int value_size;
491         unsigned int max_entries;
492         unsigned int map_flags;
493 };
494
495 struct bpf_map {
496         struct bpf_object *obj;
497         char *name;
498         /* real_name is defined for special internal maps (.rodata*,
499          * .data*, .bss, .kconfig) and preserves their original ELF section
500          * name. This is important to be able to find corresponding BTF
501          * DATASEC information.
502          */
503         char *real_name;
504         int fd;
505         int sec_idx;
506         size_t sec_offset;
507         int map_ifindex;
508         int inner_map_fd;
509         struct bpf_map_def def;
510         __u32 numa_node;
511         __u32 btf_var_idx;
512         __u32 btf_key_type_id;
513         __u32 btf_value_type_id;
514         __u32 btf_vmlinux_value_type_id;
515         enum libbpf_map_type libbpf_type;
516         void *mmaped;
517         struct bpf_struct_ops *st_ops;
518         struct bpf_map *inner_map;
519         void **init_slots;
520         int init_slots_sz;
521         char *pin_path;
522         bool pinned;
523         bool reused;
524         bool autocreate;
525         __u64 map_extra;
526 };
527
528 enum extern_type {
529         EXT_UNKNOWN,
530         EXT_KCFG,
531         EXT_KSYM,
532 };
533
534 enum kcfg_type {
535         KCFG_UNKNOWN,
536         KCFG_CHAR,
537         KCFG_BOOL,
538         KCFG_INT,
539         KCFG_TRISTATE,
540         KCFG_CHAR_ARR,
541 };
542
543 struct extern_desc {
544         enum extern_type type;
545         int sym_idx;
546         int btf_id;
547         int sec_btf_id;
548         const char *name;
549         bool is_set;
550         bool is_weak;
551         union {
552                 struct {
553                         enum kcfg_type type;
554                         int sz;
555                         int align;
556                         int data_off;
557                         bool is_signed;
558                 } kcfg;
559                 struct {
560                         unsigned long long addr;
561
562                         /* target btf_id of the corresponding kernel var. */
563                         int kernel_btf_obj_fd;
564                         int kernel_btf_id;
565
566                         /* local btf_id of the ksym extern's type. */
567                         __u32 type_id;
568                         /* BTF fd index to be patched in for insn->off, this is
569                          * 0 for vmlinux BTF, index in obj->fd_array for module
570                          * BTF
571                          */
572                         __s16 btf_fd_idx;
573                 } ksym;
574         };
575 };
576
577 struct module_btf {
578         struct btf *btf;
579         char *name;
580         __u32 id;
581         int fd;
582         int fd_array_idx;
583 };
584
585 enum sec_type {
586         SEC_UNUSED = 0,
587         SEC_RELO,
588         SEC_BSS,
589         SEC_DATA,
590         SEC_RODATA,
591 };
592
593 struct elf_sec_desc {
594         enum sec_type sec_type;
595         Elf64_Shdr *shdr;
596         Elf_Data *data;
597 };
598
599 struct elf_state {
600         int fd;
601         const void *obj_buf;
602         size_t obj_buf_sz;
603         Elf *elf;
604         Elf64_Ehdr *ehdr;
605         Elf_Data *symbols;
606         Elf_Data *st_ops_data;
607         Elf_Data *st_ops_link_data;
608         size_t shstrndx; /* section index for section name strings */
609         size_t strtabidx;
610         struct elf_sec_desc *secs;
611         size_t sec_cnt;
612         int btf_maps_shndx;
613         __u32 btf_maps_sec_btf_id;
614         int text_shndx;
615         int symbols_shndx;
616         int st_ops_shndx;
617         int st_ops_link_shndx;
618 };
619
620 struct usdt_manager;
621
622 struct bpf_object {
623         char name[BPF_OBJ_NAME_LEN];
624         char license[64];
625         __u32 kern_version;
626
627         struct bpf_program *programs;
628         size_t nr_programs;
629         struct bpf_map *maps;
630         size_t nr_maps;
631         size_t maps_cap;
632
633         char *kconfig;
634         struct extern_desc *externs;
635         int nr_extern;
636         int kconfig_map_idx;
637
638         bool loaded;
639         bool has_subcalls;
640         bool has_rodata;
641
642         struct bpf_gen *gen_loader;
643
644         /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
645         struct elf_state efile;
646
647         struct btf *btf;
648         struct btf_ext *btf_ext;
649
650         /* Parse and load BTF vmlinux if any of the programs in the object need
651          * it at load time.
652          */
653         struct btf *btf_vmlinux;
654         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
655          * override for vmlinux BTF.
656          */
657         char *btf_custom_path;
658         /* vmlinux BTF override for CO-RE relocations */
659         struct btf *btf_vmlinux_override;
660         /* Lazily initialized kernel module BTFs */
661         struct module_btf *btf_modules;
662         bool btf_modules_loaded;
663         size_t btf_module_cnt;
664         size_t btf_module_cap;
665
666         /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
667         char *log_buf;
668         size_t log_size;
669         __u32 log_level;
670
671         int *fd_array;
672         size_t fd_array_cap;
673         size_t fd_array_cnt;
674
675         struct usdt_manager *usdt_man;
676
677         char path[];
678 };
679
680 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
681 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
682 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
683 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
684 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
685 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
686 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
687 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
688 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
689
690 void bpf_program__unload(struct bpf_program *prog)
691 {
692         if (!prog)
693                 return;
694
695         zclose(prog->fd);
696
697         zfree(&prog->func_info);
698         zfree(&prog->line_info);
699 }
700
701 static void bpf_program__exit(struct bpf_program *prog)
702 {
703         if (!prog)
704                 return;
705
706         bpf_program__unload(prog);
707         zfree(&prog->name);
708         zfree(&prog->sec_name);
709         zfree(&prog->insns);
710         zfree(&prog->reloc_desc);
711
712         prog->nr_reloc = 0;
713         prog->insns_cnt = 0;
714         prog->sec_idx = -1;
715 }
716
717 static bool insn_is_subprog_call(const struct bpf_insn *insn)
718 {
719         return BPF_CLASS(insn->code) == BPF_JMP &&
720                BPF_OP(insn->code) == BPF_CALL &&
721                BPF_SRC(insn->code) == BPF_K &&
722                insn->src_reg == BPF_PSEUDO_CALL &&
723                insn->dst_reg == 0 &&
724                insn->off == 0;
725 }
726
727 static bool is_call_insn(const struct bpf_insn *insn)
728 {
729         return insn->code == (BPF_JMP | BPF_CALL);
730 }
731
732 static bool insn_is_pseudo_func(struct bpf_insn *insn)
733 {
734         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
735 }
736
737 static int
738 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
739                       const char *name, size_t sec_idx, const char *sec_name,
740                       size_t sec_off, void *insn_data, size_t insn_data_sz)
741 {
742         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
743                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
744                         sec_name, name, sec_off, insn_data_sz);
745                 return -EINVAL;
746         }
747
748         memset(prog, 0, sizeof(*prog));
749         prog->obj = obj;
750
751         prog->sec_idx = sec_idx;
752         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
753         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
754         /* insns_cnt can later be increased by appending used subprograms */
755         prog->insns_cnt = prog->sec_insn_cnt;
756
757         prog->type = BPF_PROG_TYPE_UNSPEC;
758         prog->fd = -1;
759
760         /* libbpf's convention for SEC("?abc...") is that it's just like
761          * SEC("abc...") but the corresponding bpf_program starts out with
762          * autoload set to false.
763          */
764         if (sec_name[0] == '?') {
765                 prog->autoload = false;
766                 /* from now on forget there was ? in section name */
767                 sec_name++;
768         } else {
769                 prog->autoload = true;
770         }
771
772         prog->autoattach = true;
773
774         /* inherit object's log_level */
775         prog->log_level = obj->log_level;
776
777         prog->sec_name = strdup(sec_name);
778         if (!prog->sec_name)
779                 goto errout;
780
781         prog->name = strdup(name);
782         if (!prog->name)
783                 goto errout;
784
785         prog->insns = malloc(insn_data_sz);
786         if (!prog->insns)
787                 goto errout;
788         memcpy(prog->insns, insn_data, insn_data_sz);
789
790         return 0;
791 errout:
792         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
793         bpf_program__exit(prog);
794         return -ENOMEM;
795 }
796
797 static int
798 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
799                          const char *sec_name, int sec_idx)
800 {
801         Elf_Data *symbols = obj->efile.symbols;
802         struct bpf_program *prog, *progs;
803         void *data = sec_data->d_buf;
804         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
805         int nr_progs, err, i;
806         const char *name;
807         Elf64_Sym *sym;
808
809         progs = obj->programs;
810         nr_progs = obj->nr_programs;
811         nr_syms = symbols->d_size / sizeof(Elf64_Sym);
812
813         for (i = 0; i < nr_syms; i++) {
814                 sym = elf_sym_by_idx(obj, i);
815
816                 if (sym->st_shndx != sec_idx)
817                         continue;
818                 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
819                         continue;
820
821                 prog_sz = sym->st_size;
822                 sec_off = sym->st_value;
823
824                 name = elf_sym_str(obj, sym->st_name);
825                 if (!name) {
826                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
827                                 sec_name, sec_off);
828                         return -LIBBPF_ERRNO__FORMAT;
829                 }
830
831                 if (sec_off + prog_sz > sec_sz) {
832                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
833                                 sec_name, sec_off);
834                         return -LIBBPF_ERRNO__FORMAT;
835                 }
836
837                 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
838                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
839                         return -ENOTSUP;
840                 }
841
842                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
843                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
844
845                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
846                 if (!progs) {
847                         /*
848                          * In this case the original obj->programs
849                          * is still valid, so don't need special treat for
850                          * bpf_close_object().
851                          */
852                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
853                                 sec_name, name);
854                         return -ENOMEM;
855                 }
856                 obj->programs = progs;
857
858                 prog = &progs[nr_progs];
859
860                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
861                                             sec_off, data + sec_off, prog_sz);
862                 if (err)
863                         return err;
864
865                 /* if function is a global/weak symbol, but has restricted
866                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
867                  * as static to enable more permissive BPF verification mode
868                  * with more outside context available to BPF verifier
869                  */
870                 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL
871                     && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
872                         || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
873                         prog->mark_btf_static = true;
874
875                 nr_progs++;
876                 obj->nr_programs = nr_progs;
877         }
878
879         return 0;
880 }
881
882 static const struct btf_member *
883 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
884 {
885         struct btf_member *m;
886         int i;
887
888         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
889                 if (btf_member_bit_offset(t, i) == bit_offset)
890                         return m;
891         }
892
893         return NULL;
894 }
895
896 static const struct btf_member *
897 find_member_by_name(const struct btf *btf, const struct btf_type *t,
898                     const char *name)
899 {
900         struct btf_member *m;
901         int i;
902
903         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
904                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
905                         return m;
906         }
907
908         return NULL;
909 }
910
911 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
912 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
913                                    const char *name, __u32 kind);
914
915 static int
916 find_struct_ops_kern_types(const struct btf *btf, const char *tname,
917                            const struct btf_type **type, __u32 *type_id,
918                            const struct btf_type **vtype, __u32 *vtype_id,
919                            const struct btf_member **data_member)
920 {
921         const struct btf_type *kern_type, *kern_vtype;
922         const struct btf_member *kern_data_member;
923         __s32 kern_vtype_id, kern_type_id;
924         __u32 i;
925
926         kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
927         if (kern_type_id < 0) {
928                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
929                         tname);
930                 return kern_type_id;
931         }
932         kern_type = btf__type_by_id(btf, kern_type_id);
933
934         /* Find the corresponding "map_value" type that will be used
935          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
936          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
937          * btf_vmlinux.
938          */
939         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
940                                                 tname, BTF_KIND_STRUCT);
941         if (kern_vtype_id < 0) {
942                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
943                         STRUCT_OPS_VALUE_PREFIX, tname);
944                 return kern_vtype_id;
945         }
946         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
947
948         /* Find "struct tcp_congestion_ops" from
949          * struct bpf_struct_ops_tcp_congestion_ops {
950          *      [ ... ]
951          *      struct tcp_congestion_ops data;
952          * }
953          */
954         kern_data_member = btf_members(kern_vtype);
955         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
956                 if (kern_data_member->type == kern_type_id)
957                         break;
958         }
959         if (i == btf_vlen(kern_vtype)) {
960                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
961                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
962                 return -EINVAL;
963         }
964
965         *type = kern_type;
966         *type_id = kern_type_id;
967         *vtype = kern_vtype;
968         *vtype_id = kern_vtype_id;
969         *data_member = kern_data_member;
970
971         return 0;
972 }
973
974 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
975 {
976         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
977 }
978
979 /* Init the map's fields that depend on kern_btf */
980 static int bpf_map__init_kern_struct_ops(struct bpf_map *map,
981                                          const struct btf *btf,
982                                          const struct btf *kern_btf)
983 {
984         const struct btf_member *member, *kern_member, *kern_data_member;
985         const struct btf_type *type, *kern_type, *kern_vtype;
986         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
987         struct bpf_struct_ops *st_ops;
988         void *data, *kern_data;
989         const char *tname;
990         int err;
991
992         st_ops = map->st_ops;
993         type = st_ops->type;
994         tname = st_ops->tname;
995         err = find_struct_ops_kern_types(kern_btf, tname,
996                                          &kern_type, &kern_type_id,
997                                          &kern_vtype, &kern_vtype_id,
998                                          &kern_data_member);
999         if (err)
1000                 return err;
1001
1002         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1003                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1004
1005         map->def.value_size = kern_vtype->size;
1006         map->btf_vmlinux_value_type_id = kern_vtype_id;
1007
1008         st_ops->kern_vdata = calloc(1, kern_vtype->size);
1009         if (!st_ops->kern_vdata)
1010                 return -ENOMEM;
1011
1012         data = st_ops->data;
1013         kern_data_off = kern_data_member->offset / 8;
1014         kern_data = st_ops->kern_vdata + kern_data_off;
1015
1016         member = btf_members(type);
1017         for (i = 0; i < btf_vlen(type); i++, member++) {
1018                 const struct btf_type *mtype, *kern_mtype;
1019                 __u32 mtype_id, kern_mtype_id;
1020                 void *mdata, *kern_mdata;
1021                 __s64 msize, kern_msize;
1022                 __u32 moff, kern_moff;
1023                 __u32 kern_member_idx;
1024                 const char *mname;
1025
1026                 mname = btf__name_by_offset(btf, member->name_off);
1027                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1028                 if (!kern_member) {
1029                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1030                                 map->name, mname);
1031                         return -ENOTSUP;
1032                 }
1033
1034                 kern_member_idx = kern_member - btf_members(kern_type);
1035                 if (btf_member_bitfield_size(type, i) ||
1036                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
1037                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1038                                 map->name, mname);
1039                         return -ENOTSUP;
1040                 }
1041
1042                 moff = member->offset / 8;
1043                 kern_moff = kern_member->offset / 8;
1044
1045                 mdata = data + moff;
1046                 kern_mdata = kern_data + kern_moff;
1047
1048                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1049                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1050                                                     &kern_mtype_id);
1051                 if (BTF_INFO_KIND(mtype->info) !=
1052                     BTF_INFO_KIND(kern_mtype->info)) {
1053                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1054                                 map->name, mname, BTF_INFO_KIND(mtype->info),
1055                                 BTF_INFO_KIND(kern_mtype->info));
1056                         return -ENOTSUP;
1057                 }
1058
1059                 if (btf_is_ptr(mtype)) {
1060                         struct bpf_program *prog;
1061
1062                         prog = st_ops->progs[i];
1063                         if (!prog)
1064                                 continue;
1065
1066                         kern_mtype = skip_mods_and_typedefs(kern_btf,
1067                                                             kern_mtype->type,
1068                                                             &kern_mtype_id);
1069
1070                         /* mtype->type must be a func_proto which was
1071                          * guaranteed in bpf_object__collect_st_ops_relos(),
1072                          * so only check kern_mtype for func_proto here.
1073                          */
1074                         if (!btf_is_func_proto(kern_mtype)) {
1075                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1076                                         map->name, mname);
1077                                 return -ENOTSUP;
1078                         }
1079
1080                         prog->attach_btf_id = kern_type_id;
1081                         prog->expected_attach_type = kern_member_idx;
1082
1083                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1084
1085                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1086                                  map->name, mname, prog->name, moff,
1087                                  kern_moff);
1088
1089                         continue;
1090                 }
1091
1092                 msize = btf__resolve_size(btf, mtype_id);
1093                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1094                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1095                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1096                                 map->name, mname, (ssize_t)msize,
1097                                 (ssize_t)kern_msize);
1098                         return -ENOTSUP;
1099                 }
1100
1101                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1102                          map->name, mname, (unsigned int)msize,
1103                          moff, kern_moff);
1104                 memcpy(kern_mdata, mdata, msize);
1105         }
1106
1107         return 0;
1108 }
1109
1110 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1111 {
1112         struct bpf_map *map;
1113         size_t i;
1114         int err;
1115
1116         for (i = 0; i < obj->nr_maps; i++) {
1117                 map = &obj->maps[i];
1118
1119                 if (!bpf_map__is_struct_ops(map))
1120                         continue;
1121
1122                 err = bpf_map__init_kern_struct_ops(map, obj->btf,
1123                                                     obj->btf_vmlinux);
1124                 if (err)
1125                         return err;
1126         }
1127
1128         return 0;
1129 }
1130
1131 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1132                                 int shndx, Elf_Data *data, __u32 map_flags)
1133 {
1134         const struct btf_type *type, *datasec;
1135         const struct btf_var_secinfo *vsi;
1136         struct bpf_struct_ops *st_ops;
1137         const char *tname, *var_name;
1138         __s32 type_id, datasec_id;
1139         const struct btf *btf;
1140         struct bpf_map *map;
1141         __u32 i;
1142
1143         if (shndx == -1)
1144                 return 0;
1145
1146         btf = obj->btf;
1147         datasec_id = btf__find_by_name_kind(btf, sec_name,
1148                                             BTF_KIND_DATASEC);
1149         if (datasec_id < 0) {
1150                 pr_warn("struct_ops init: DATASEC %s not found\n",
1151                         sec_name);
1152                 return -EINVAL;
1153         }
1154
1155         datasec = btf__type_by_id(btf, datasec_id);
1156         vsi = btf_var_secinfos(datasec);
1157         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1158                 type = btf__type_by_id(obj->btf, vsi->type);
1159                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1160
1161                 type_id = btf__resolve_type(obj->btf, vsi->type);
1162                 if (type_id < 0) {
1163                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1164                                 vsi->type, sec_name);
1165                         return -EINVAL;
1166                 }
1167
1168                 type = btf__type_by_id(obj->btf, type_id);
1169                 tname = btf__name_by_offset(obj->btf, type->name_off);
1170                 if (!tname[0]) {
1171                         pr_warn("struct_ops init: anonymous type is not supported\n");
1172                         return -ENOTSUP;
1173                 }
1174                 if (!btf_is_struct(type)) {
1175                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1176                         return -EINVAL;
1177                 }
1178
1179                 map = bpf_object__add_map(obj);
1180                 if (IS_ERR(map))
1181                         return PTR_ERR(map);
1182
1183                 map->sec_idx = shndx;
1184                 map->sec_offset = vsi->offset;
1185                 map->name = strdup(var_name);
1186                 if (!map->name)
1187                         return -ENOMEM;
1188
1189                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1190                 map->def.key_size = sizeof(int);
1191                 map->def.value_size = type->size;
1192                 map->def.max_entries = 1;
1193                 map->def.map_flags = map_flags;
1194
1195                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1196                 if (!map->st_ops)
1197                         return -ENOMEM;
1198                 st_ops = map->st_ops;
1199                 st_ops->data = malloc(type->size);
1200                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1201                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1202                                                sizeof(*st_ops->kern_func_off));
1203                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1204                         return -ENOMEM;
1205
1206                 if (vsi->offset + type->size > data->d_size) {
1207                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1208                                 var_name, sec_name);
1209                         return -EINVAL;
1210                 }
1211
1212                 memcpy(st_ops->data,
1213                        data->d_buf + vsi->offset,
1214                        type->size);
1215                 st_ops->tname = tname;
1216                 st_ops->type = type;
1217                 st_ops->type_id = type_id;
1218
1219                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1220                          tname, type_id, var_name, vsi->offset);
1221         }
1222
1223         return 0;
1224 }
1225
1226 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1227 {
1228         int err;
1229
1230         err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
1231                                    obj->efile.st_ops_data, 0);
1232         err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
1233                                           obj->efile.st_ops_link_shndx,
1234                                           obj->efile.st_ops_link_data,
1235                                           BPF_F_LINK);
1236         return err;
1237 }
1238
1239 static struct bpf_object *bpf_object__new(const char *path,
1240                                           const void *obj_buf,
1241                                           size_t obj_buf_sz,
1242                                           const char *obj_name)
1243 {
1244         struct bpf_object *obj;
1245         char *end;
1246
1247         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1248         if (!obj) {
1249                 pr_warn("alloc memory failed for %s\n", path);
1250                 return ERR_PTR(-ENOMEM);
1251         }
1252
1253         strcpy(obj->path, path);
1254         if (obj_name) {
1255                 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1256         } else {
1257                 /* Using basename() GNU version which doesn't modify arg. */
1258                 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1259                 end = strchr(obj->name, '.');
1260                 if (end)
1261                         *end = 0;
1262         }
1263
1264         obj->efile.fd = -1;
1265         /*
1266          * Caller of this function should also call
1267          * bpf_object__elf_finish() after data collection to return
1268          * obj_buf to user. If not, we should duplicate the buffer to
1269          * avoid user freeing them before elf finish.
1270          */
1271         obj->efile.obj_buf = obj_buf;
1272         obj->efile.obj_buf_sz = obj_buf_sz;
1273         obj->efile.btf_maps_shndx = -1;
1274         obj->efile.st_ops_shndx = -1;
1275         obj->efile.st_ops_link_shndx = -1;
1276         obj->kconfig_map_idx = -1;
1277
1278         obj->kern_version = get_kernel_version();
1279         obj->loaded = false;
1280
1281         return obj;
1282 }
1283
1284 static void bpf_object__elf_finish(struct bpf_object *obj)
1285 {
1286         if (!obj->efile.elf)
1287                 return;
1288
1289         elf_end(obj->efile.elf);
1290         obj->efile.elf = NULL;
1291         obj->efile.symbols = NULL;
1292         obj->efile.st_ops_data = NULL;
1293         obj->efile.st_ops_link_data = NULL;
1294
1295         zfree(&obj->efile.secs);
1296         obj->efile.sec_cnt = 0;
1297         zclose(obj->efile.fd);
1298         obj->efile.obj_buf = NULL;
1299         obj->efile.obj_buf_sz = 0;
1300 }
1301
1302 static int bpf_object__elf_init(struct bpf_object *obj)
1303 {
1304         Elf64_Ehdr *ehdr;
1305         int err = 0;
1306         Elf *elf;
1307
1308         if (obj->efile.elf) {
1309                 pr_warn("elf: init internal error\n");
1310                 return -LIBBPF_ERRNO__LIBELF;
1311         }
1312
1313         if (obj->efile.obj_buf_sz > 0) {
1314                 /* obj_buf should have been validated by bpf_object__open_mem(). */
1315                 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1316         } else {
1317                 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1318                 if (obj->efile.fd < 0) {
1319                         char errmsg[STRERR_BUFSIZE], *cp;
1320
1321                         err = -errno;
1322                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1323                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1324                         return err;
1325                 }
1326
1327                 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1328         }
1329
1330         if (!elf) {
1331                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1332                 err = -LIBBPF_ERRNO__LIBELF;
1333                 goto errout;
1334         }
1335
1336         obj->efile.elf = elf;
1337
1338         if (elf_kind(elf) != ELF_K_ELF) {
1339                 err = -LIBBPF_ERRNO__FORMAT;
1340                 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1341                 goto errout;
1342         }
1343
1344         if (gelf_getclass(elf) != ELFCLASS64) {
1345                 err = -LIBBPF_ERRNO__FORMAT;
1346                 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1347                 goto errout;
1348         }
1349
1350         obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1351         if (!obj->efile.ehdr) {
1352                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1353                 err = -LIBBPF_ERRNO__FORMAT;
1354                 goto errout;
1355         }
1356
1357         if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1358                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1359                         obj->path, elf_errmsg(-1));
1360                 err = -LIBBPF_ERRNO__FORMAT;
1361                 goto errout;
1362         }
1363
1364         /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1365         if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1366                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1367                         obj->path, elf_errmsg(-1));
1368                 err = -LIBBPF_ERRNO__FORMAT;
1369                 goto errout;
1370         }
1371
1372         /* Old LLVM set e_machine to EM_NONE */
1373         if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1374                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1375                 err = -LIBBPF_ERRNO__FORMAT;
1376                 goto errout;
1377         }
1378
1379         return 0;
1380 errout:
1381         bpf_object__elf_finish(obj);
1382         return err;
1383 }
1384
1385 static int bpf_object__check_endianness(struct bpf_object *obj)
1386 {
1387 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1388         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1389                 return 0;
1390 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1391         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1392                 return 0;
1393 #else
1394 # error "Unrecognized __BYTE_ORDER__"
1395 #endif
1396         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1397         return -LIBBPF_ERRNO__ENDIAN;
1398 }
1399
1400 static int
1401 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1402 {
1403         if (!data) {
1404                 pr_warn("invalid license section in %s\n", obj->path);
1405                 return -LIBBPF_ERRNO__FORMAT;
1406         }
1407         /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1408          * go over allowed ELF data section buffer
1409          */
1410         libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1411         pr_debug("license of %s is %s\n", obj->path, obj->license);
1412         return 0;
1413 }
1414
1415 static int
1416 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1417 {
1418         __u32 kver;
1419
1420         if (!data || size != sizeof(kver)) {
1421                 pr_warn("invalid kver section in %s\n", obj->path);
1422                 return -LIBBPF_ERRNO__FORMAT;
1423         }
1424         memcpy(&kver, data, sizeof(kver));
1425         obj->kern_version = kver;
1426         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1427         return 0;
1428 }
1429
1430 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1431 {
1432         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1433             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1434                 return true;
1435         return false;
1436 }
1437
1438 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1439 {
1440         Elf_Data *data;
1441         Elf_Scn *scn;
1442
1443         if (!name)
1444                 return -EINVAL;
1445
1446         scn = elf_sec_by_name(obj, name);
1447         data = elf_sec_data(obj, scn);
1448         if (data) {
1449                 *size = data->d_size;
1450                 return 0; /* found it */
1451         }
1452
1453         return -ENOENT;
1454 }
1455
1456 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1457 {
1458         Elf_Data *symbols = obj->efile.symbols;
1459         const char *sname;
1460         size_t si;
1461
1462         for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1463                 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1464
1465                 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1466                         continue;
1467
1468                 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1469                     ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1470                         continue;
1471
1472                 sname = elf_sym_str(obj, sym->st_name);
1473                 if (!sname) {
1474                         pr_warn("failed to get sym name string for var %s\n", name);
1475                         return ERR_PTR(-EIO);
1476                 }
1477                 if (strcmp(name, sname) == 0)
1478                         return sym;
1479         }
1480
1481         return ERR_PTR(-ENOENT);
1482 }
1483
1484 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1485 {
1486         struct bpf_map *map;
1487         int err;
1488
1489         err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1490                                 sizeof(*obj->maps), obj->nr_maps + 1);
1491         if (err)
1492                 return ERR_PTR(err);
1493
1494         map = &obj->maps[obj->nr_maps++];
1495         map->obj = obj;
1496         map->fd = -1;
1497         map->inner_map_fd = -1;
1498         map->autocreate = true;
1499
1500         return map;
1501 }
1502
1503 static size_t bpf_map_mmap_sz(const struct bpf_map *map)
1504 {
1505         long page_sz = sysconf(_SC_PAGE_SIZE);
1506         size_t map_sz;
1507
1508         map_sz = (size_t)roundup(map->def.value_size, 8) * map->def.max_entries;
1509         map_sz = roundup(map_sz, page_sz);
1510         return map_sz;
1511 }
1512
1513 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1514 {
1515         char map_name[BPF_OBJ_NAME_LEN], *p;
1516         int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1517
1518         /* This is one of the more confusing parts of libbpf for various
1519          * reasons, some of which are historical. The original idea for naming
1520          * internal names was to include as much of BPF object name prefix as
1521          * possible, so that it can be distinguished from similar internal
1522          * maps of a different BPF object.
1523          * As an example, let's say we have bpf_object named 'my_object_name'
1524          * and internal map corresponding to '.rodata' ELF section. The final
1525          * map name advertised to user and to the kernel will be
1526          * 'my_objec.rodata', taking first 8 characters of object name and
1527          * entire 7 characters of '.rodata'.
1528          * Somewhat confusingly, if internal map ELF section name is shorter
1529          * than 7 characters, e.g., '.bss', we still reserve 7 characters
1530          * for the suffix, even though we only have 4 actual characters, and
1531          * resulting map will be called 'my_objec.bss', not even using all 15
1532          * characters allowed by the kernel. Oh well, at least the truncated
1533          * object name is somewhat consistent in this case. But if the map
1534          * name is '.kconfig', we'll still have entirety of '.kconfig' added
1535          * (8 chars) and thus will be left with only first 7 characters of the
1536          * object name ('my_obje'). Happy guessing, user, that the final map
1537          * name will be "my_obje.kconfig".
1538          * Now, with libbpf starting to support arbitrarily named .rodata.*
1539          * and .data.* data sections, it's possible that ELF section name is
1540          * longer than allowed 15 chars, so we now need to be careful to take
1541          * only up to 15 first characters of ELF name, taking no BPF object
1542          * name characters at all. So '.rodata.abracadabra' will result in
1543          * '.rodata.abracad' kernel and user-visible name.
1544          * We need to keep this convoluted logic intact for .data, .bss and
1545          * .rodata maps, but for new custom .data.custom and .rodata.custom
1546          * maps we use their ELF names as is, not prepending bpf_object name
1547          * in front. We still need to truncate them to 15 characters for the
1548          * kernel. Full name can be recovered for such maps by using DATASEC
1549          * BTF type associated with such map's value type, though.
1550          */
1551         if (sfx_len >= BPF_OBJ_NAME_LEN)
1552                 sfx_len = BPF_OBJ_NAME_LEN - 1;
1553
1554         /* if there are two or more dots in map name, it's a custom dot map */
1555         if (strchr(real_name + 1, '.') != NULL)
1556                 pfx_len = 0;
1557         else
1558                 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1559
1560         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1561                  sfx_len, real_name);
1562
1563         /* sanitise map name to characters allowed by kernel */
1564         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1565                 if (!isalnum(*p) && *p != '_' && *p != '.')
1566                         *p = '_';
1567
1568         return strdup(map_name);
1569 }
1570
1571 static int
1572 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1573
1574 /* Internal BPF map is mmap()'able only if at least one of corresponding
1575  * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1576  * variable and it's not marked as __hidden (which turns it into, effectively,
1577  * a STATIC variable).
1578  */
1579 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1580 {
1581         const struct btf_type *t, *vt;
1582         struct btf_var_secinfo *vsi;
1583         int i, n;
1584
1585         if (!map->btf_value_type_id)
1586                 return false;
1587
1588         t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1589         if (!btf_is_datasec(t))
1590                 return false;
1591
1592         vsi = btf_var_secinfos(t);
1593         for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1594                 vt = btf__type_by_id(obj->btf, vsi->type);
1595                 if (!btf_is_var(vt))
1596                         continue;
1597
1598                 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1599                         return true;
1600         }
1601
1602         return false;
1603 }
1604
1605 static int
1606 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1607                               const char *real_name, int sec_idx, void *data, size_t data_sz)
1608 {
1609         struct bpf_map_def *def;
1610         struct bpf_map *map;
1611         int err;
1612
1613         map = bpf_object__add_map(obj);
1614         if (IS_ERR(map))
1615                 return PTR_ERR(map);
1616
1617         map->libbpf_type = type;
1618         map->sec_idx = sec_idx;
1619         map->sec_offset = 0;
1620         map->real_name = strdup(real_name);
1621         map->name = internal_map_name(obj, real_name);
1622         if (!map->real_name || !map->name) {
1623                 zfree(&map->real_name);
1624                 zfree(&map->name);
1625                 return -ENOMEM;
1626         }
1627
1628         def = &map->def;
1629         def->type = BPF_MAP_TYPE_ARRAY;
1630         def->key_size = sizeof(int);
1631         def->value_size = data_sz;
1632         def->max_entries = 1;
1633         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1634                          ? BPF_F_RDONLY_PROG : 0;
1635
1636         /* failures are fine because of maps like .rodata.str1.1 */
1637         (void) map_fill_btf_type_info(obj, map);
1638
1639         if (map_is_mmapable(obj, map))
1640                 def->map_flags |= BPF_F_MMAPABLE;
1641
1642         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1643                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1644
1645         map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE,
1646                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1647         if (map->mmaped == MAP_FAILED) {
1648                 err = -errno;
1649                 map->mmaped = NULL;
1650                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1651                         map->name, err);
1652                 zfree(&map->real_name);
1653                 zfree(&map->name);
1654                 return err;
1655         }
1656
1657         if (data)
1658                 memcpy(map->mmaped, data, data_sz);
1659
1660         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1661         return 0;
1662 }
1663
1664 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1665 {
1666         struct elf_sec_desc *sec_desc;
1667         const char *sec_name;
1668         int err = 0, sec_idx;
1669
1670         /*
1671          * Populate obj->maps with libbpf internal maps.
1672          */
1673         for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1674                 sec_desc = &obj->efile.secs[sec_idx];
1675
1676                 /* Skip recognized sections with size 0. */
1677                 if (!sec_desc->data || sec_desc->data->d_size == 0)
1678                         continue;
1679
1680                 switch (sec_desc->sec_type) {
1681                 case SEC_DATA:
1682                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1683                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1684                                                             sec_name, sec_idx,
1685                                                             sec_desc->data->d_buf,
1686                                                             sec_desc->data->d_size);
1687                         break;
1688                 case SEC_RODATA:
1689                         obj->has_rodata = true;
1690                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1691                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1692                                                             sec_name, sec_idx,
1693                                                             sec_desc->data->d_buf,
1694                                                             sec_desc->data->d_size);
1695                         break;
1696                 case SEC_BSS:
1697                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1698                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1699                                                             sec_name, sec_idx,
1700                                                             NULL,
1701                                                             sec_desc->data->d_size);
1702                         break;
1703                 default:
1704                         /* skip */
1705                         break;
1706                 }
1707                 if (err)
1708                         return err;
1709         }
1710         return 0;
1711 }
1712
1713
1714 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1715                                                const void *name)
1716 {
1717         int i;
1718
1719         for (i = 0; i < obj->nr_extern; i++) {
1720                 if (strcmp(obj->externs[i].name, name) == 0)
1721                         return &obj->externs[i];
1722         }
1723         return NULL;
1724 }
1725
1726 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1727                               char value)
1728 {
1729         switch (ext->kcfg.type) {
1730         case KCFG_BOOL:
1731                 if (value == 'm') {
1732                         pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1733                                 ext->name, value);
1734                         return -EINVAL;
1735                 }
1736                 *(bool *)ext_val = value == 'y' ? true : false;
1737                 break;
1738         case KCFG_TRISTATE:
1739                 if (value == 'y')
1740                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1741                 else if (value == 'm')
1742                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1743                 else /* value == 'n' */
1744                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1745                 break;
1746         case KCFG_CHAR:
1747                 *(char *)ext_val = value;
1748                 break;
1749         case KCFG_UNKNOWN:
1750         case KCFG_INT:
1751         case KCFG_CHAR_ARR:
1752         default:
1753                 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1754                         ext->name, value);
1755                 return -EINVAL;
1756         }
1757         ext->is_set = true;
1758         return 0;
1759 }
1760
1761 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1762                               const char *value)
1763 {
1764         size_t len;
1765
1766         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1767                 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1768                         ext->name, value);
1769                 return -EINVAL;
1770         }
1771
1772         len = strlen(value);
1773         if (value[len - 1] != '"') {
1774                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1775                         ext->name, value);
1776                 return -EINVAL;
1777         }
1778
1779         /* strip quotes */
1780         len -= 2;
1781         if (len >= ext->kcfg.sz) {
1782                 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1783                         ext->name, value, len, ext->kcfg.sz - 1);
1784                 len = ext->kcfg.sz - 1;
1785         }
1786         memcpy(ext_val, value + 1, len);
1787         ext_val[len] = '\0';
1788         ext->is_set = true;
1789         return 0;
1790 }
1791
1792 static int parse_u64(const char *value, __u64 *res)
1793 {
1794         char *value_end;
1795         int err;
1796
1797         errno = 0;
1798         *res = strtoull(value, &value_end, 0);
1799         if (errno) {
1800                 err = -errno;
1801                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1802                 return err;
1803         }
1804         if (*value_end) {
1805                 pr_warn("failed to parse '%s' as integer completely\n", value);
1806                 return -EINVAL;
1807         }
1808         return 0;
1809 }
1810
1811 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1812 {
1813         int bit_sz = ext->kcfg.sz * 8;
1814
1815         if (ext->kcfg.sz == 8)
1816                 return true;
1817
1818         /* Validate that value stored in u64 fits in integer of `ext->sz`
1819          * bytes size without any loss of information. If the target integer
1820          * is signed, we rely on the following limits of integer type of
1821          * Y bits and subsequent transformation:
1822          *
1823          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1824          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1825          *            0 <= X + 2^(Y-1) <  2^Y
1826          *
1827          *  For unsigned target integer, check that all the (64 - Y) bits are
1828          *  zero.
1829          */
1830         if (ext->kcfg.is_signed)
1831                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1832         else
1833                 return (v >> bit_sz) == 0;
1834 }
1835
1836 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1837                               __u64 value)
1838 {
1839         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
1840             ext->kcfg.type != KCFG_BOOL) {
1841                 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
1842                         ext->name, (unsigned long long)value);
1843                 return -EINVAL;
1844         }
1845         if (ext->kcfg.type == KCFG_BOOL && value > 1) {
1846                 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
1847                         ext->name, (unsigned long long)value);
1848                 return -EINVAL;
1849
1850         }
1851         if (!is_kcfg_value_in_range(ext, value)) {
1852                 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
1853                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1854                 return -ERANGE;
1855         }
1856         switch (ext->kcfg.sz) {
1857         case 1:
1858                 *(__u8 *)ext_val = value;
1859                 break;
1860         case 2:
1861                 *(__u16 *)ext_val = value;
1862                 break;
1863         case 4:
1864                 *(__u32 *)ext_val = value;
1865                 break;
1866         case 8:
1867                 *(__u64 *)ext_val = value;
1868                 break;
1869         default:
1870                 return -EINVAL;
1871         }
1872         ext->is_set = true;
1873         return 0;
1874 }
1875
1876 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1877                                             char *buf, void *data)
1878 {
1879         struct extern_desc *ext;
1880         char *sep, *value;
1881         int len, err = 0;
1882         void *ext_val;
1883         __u64 num;
1884
1885         if (!str_has_pfx(buf, "CONFIG_"))
1886                 return 0;
1887
1888         sep = strchr(buf, '=');
1889         if (!sep) {
1890                 pr_warn("failed to parse '%s': no separator\n", buf);
1891                 return -EINVAL;
1892         }
1893
1894         /* Trim ending '\n' */
1895         len = strlen(buf);
1896         if (buf[len - 1] == '\n')
1897                 buf[len - 1] = '\0';
1898         /* Split on '=' and ensure that a value is present. */
1899         *sep = '\0';
1900         if (!sep[1]) {
1901                 *sep = '=';
1902                 pr_warn("failed to parse '%s': no value\n", buf);
1903                 return -EINVAL;
1904         }
1905
1906         ext = find_extern_by_name(obj, buf);
1907         if (!ext || ext->is_set)
1908                 return 0;
1909
1910         ext_val = data + ext->kcfg.data_off;
1911         value = sep + 1;
1912
1913         switch (*value) {
1914         case 'y': case 'n': case 'm':
1915                 err = set_kcfg_value_tri(ext, ext_val, *value);
1916                 break;
1917         case '"':
1918                 err = set_kcfg_value_str(ext, ext_val, value);
1919                 break;
1920         default:
1921                 /* assume integer */
1922                 err = parse_u64(value, &num);
1923                 if (err) {
1924                         pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
1925                         return err;
1926                 }
1927                 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
1928                         pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
1929                         return -EINVAL;
1930                 }
1931                 err = set_kcfg_value_num(ext, ext_val, num);
1932                 break;
1933         }
1934         if (err)
1935                 return err;
1936         pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
1937         return 0;
1938 }
1939
1940 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
1941 {
1942         char buf[PATH_MAX];
1943         struct utsname uts;
1944         int len, err = 0;
1945         gzFile file;
1946
1947         uname(&uts);
1948         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
1949         if (len < 0)
1950                 return -EINVAL;
1951         else if (len >= PATH_MAX)
1952                 return -ENAMETOOLONG;
1953
1954         /* gzopen also accepts uncompressed files. */
1955         file = gzopen(buf, "r");
1956         if (!file)
1957                 file = gzopen("/proc/config.gz", "r");
1958
1959         if (!file) {
1960                 pr_warn("failed to open system Kconfig\n");
1961                 return -ENOENT;
1962         }
1963
1964         while (gzgets(file, buf, sizeof(buf))) {
1965                 err = bpf_object__process_kconfig_line(obj, buf, data);
1966                 if (err) {
1967                         pr_warn("error parsing system Kconfig line '%s': %d\n",
1968                                 buf, err);
1969                         goto out;
1970                 }
1971         }
1972
1973 out:
1974         gzclose(file);
1975         return err;
1976 }
1977
1978 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
1979                                         const char *config, void *data)
1980 {
1981         char buf[PATH_MAX];
1982         int err = 0;
1983         FILE *file;
1984
1985         file = fmemopen((void *)config, strlen(config), "r");
1986         if (!file) {
1987                 err = -errno;
1988                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
1989                 return err;
1990         }
1991
1992         while (fgets(buf, sizeof(buf), file)) {
1993                 err = bpf_object__process_kconfig_line(obj, buf, data);
1994                 if (err) {
1995                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
1996                                 buf, err);
1997                         break;
1998                 }
1999         }
2000
2001         fclose(file);
2002         return err;
2003 }
2004
2005 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2006 {
2007         struct extern_desc *last_ext = NULL, *ext;
2008         size_t map_sz;
2009         int i, err;
2010
2011         for (i = 0; i < obj->nr_extern; i++) {
2012                 ext = &obj->externs[i];
2013                 if (ext->type == EXT_KCFG)
2014                         last_ext = ext;
2015         }
2016
2017         if (!last_ext)
2018                 return 0;
2019
2020         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2021         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2022                                             ".kconfig", obj->efile.symbols_shndx,
2023                                             NULL, map_sz);
2024         if (err)
2025                 return err;
2026
2027         obj->kconfig_map_idx = obj->nr_maps - 1;
2028
2029         return 0;
2030 }
2031
2032 const struct btf_type *
2033 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2034 {
2035         const struct btf_type *t = btf__type_by_id(btf, id);
2036
2037         if (res_id)
2038                 *res_id = id;
2039
2040         while (btf_is_mod(t) || btf_is_typedef(t)) {
2041                 if (res_id)
2042                         *res_id = t->type;
2043                 t = btf__type_by_id(btf, t->type);
2044         }
2045
2046         return t;
2047 }
2048
2049 static const struct btf_type *
2050 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2051 {
2052         const struct btf_type *t;
2053
2054         t = skip_mods_and_typedefs(btf, id, NULL);
2055         if (!btf_is_ptr(t))
2056                 return NULL;
2057
2058         t = skip_mods_and_typedefs(btf, t->type, res_id);
2059
2060         return btf_is_func_proto(t) ? t : NULL;
2061 }
2062
2063 static const char *__btf_kind_str(__u16 kind)
2064 {
2065         switch (kind) {
2066         case BTF_KIND_UNKN: return "void";
2067         case BTF_KIND_INT: return "int";
2068         case BTF_KIND_PTR: return "ptr";
2069         case BTF_KIND_ARRAY: return "array";
2070         case BTF_KIND_STRUCT: return "struct";
2071         case BTF_KIND_UNION: return "union";
2072         case BTF_KIND_ENUM: return "enum";
2073         case BTF_KIND_FWD: return "fwd";
2074         case BTF_KIND_TYPEDEF: return "typedef";
2075         case BTF_KIND_VOLATILE: return "volatile";
2076         case BTF_KIND_CONST: return "const";
2077         case BTF_KIND_RESTRICT: return "restrict";
2078         case BTF_KIND_FUNC: return "func";
2079         case BTF_KIND_FUNC_PROTO: return "func_proto";
2080         case BTF_KIND_VAR: return "var";
2081         case BTF_KIND_DATASEC: return "datasec";
2082         case BTF_KIND_FLOAT: return "float";
2083         case BTF_KIND_DECL_TAG: return "decl_tag";
2084         case BTF_KIND_TYPE_TAG: return "type_tag";
2085         case BTF_KIND_ENUM64: return "enum64";
2086         default: return "unknown";
2087         }
2088 }
2089
2090 const char *btf_kind_str(const struct btf_type *t)
2091 {
2092         return __btf_kind_str(btf_kind(t));
2093 }
2094
2095 /*
2096  * Fetch integer attribute of BTF map definition. Such attributes are
2097  * represented using a pointer to an array, in which dimensionality of array
2098  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2099  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2100  * type definition, while using only sizeof(void *) space in ELF data section.
2101  */
2102 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2103                               const struct btf_member *m, __u32 *res)
2104 {
2105         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2106         const char *name = btf__name_by_offset(btf, m->name_off);
2107         const struct btf_array *arr_info;
2108         const struct btf_type *arr_t;
2109
2110         if (!btf_is_ptr(t)) {
2111                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2112                         map_name, name, btf_kind_str(t));
2113                 return false;
2114         }
2115
2116         arr_t = btf__type_by_id(btf, t->type);
2117         if (!arr_t) {
2118                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2119                         map_name, name, t->type);
2120                 return false;
2121         }
2122         if (!btf_is_array(arr_t)) {
2123                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2124                         map_name, name, btf_kind_str(arr_t));
2125                 return false;
2126         }
2127         arr_info = btf_array(arr_t);
2128         *res = arr_info->nelems;
2129         return true;
2130 }
2131
2132 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2133 {
2134         int len;
2135
2136         len = snprintf(buf, buf_sz, "%s/%s", path, name);
2137         if (len < 0)
2138                 return -EINVAL;
2139         if (len >= buf_sz)
2140                 return -ENAMETOOLONG;
2141
2142         return 0;
2143 }
2144
2145 static int build_map_pin_path(struct bpf_map *map, const char *path)
2146 {
2147         char buf[PATH_MAX];
2148         int err;
2149
2150         if (!path)
2151                 path = "/sys/fs/bpf";
2152
2153         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2154         if (err)
2155                 return err;
2156
2157         return bpf_map__set_pin_path(map, buf);
2158 }
2159
2160 /* should match definition in bpf_helpers.h */
2161 enum libbpf_pin_type {
2162         LIBBPF_PIN_NONE,
2163         /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2164         LIBBPF_PIN_BY_NAME,
2165 };
2166
2167 int parse_btf_map_def(const char *map_name, struct btf *btf,
2168                       const struct btf_type *def_t, bool strict,
2169                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2170 {
2171         const struct btf_type *t;
2172         const struct btf_member *m;
2173         bool is_inner = inner_def == NULL;
2174         int vlen, i;
2175
2176         vlen = btf_vlen(def_t);
2177         m = btf_members(def_t);
2178         for (i = 0; i < vlen; i++, m++) {
2179                 const char *name = btf__name_by_offset(btf, m->name_off);
2180
2181                 if (!name) {
2182                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2183                         return -EINVAL;
2184                 }
2185                 if (strcmp(name, "type") == 0) {
2186                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2187                                 return -EINVAL;
2188                         map_def->parts |= MAP_DEF_MAP_TYPE;
2189                 } else if (strcmp(name, "max_entries") == 0) {
2190                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2191                                 return -EINVAL;
2192                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2193                 } else if (strcmp(name, "map_flags") == 0) {
2194                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2195                                 return -EINVAL;
2196                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2197                 } else if (strcmp(name, "numa_node") == 0) {
2198                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2199                                 return -EINVAL;
2200                         map_def->parts |= MAP_DEF_NUMA_NODE;
2201                 } else if (strcmp(name, "key_size") == 0) {
2202                         __u32 sz;
2203
2204                         if (!get_map_field_int(map_name, btf, m, &sz))
2205                                 return -EINVAL;
2206                         if (map_def->key_size && map_def->key_size != sz) {
2207                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2208                                         map_name, map_def->key_size, sz);
2209                                 return -EINVAL;
2210                         }
2211                         map_def->key_size = sz;
2212                         map_def->parts |= MAP_DEF_KEY_SIZE;
2213                 } else if (strcmp(name, "key") == 0) {
2214                         __s64 sz;
2215
2216                         t = btf__type_by_id(btf, m->type);
2217                         if (!t) {
2218                                 pr_warn("map '%s': key type [%d] not found.\n",
2219                                         map_name, m->type);
2220                                 return -EINVAL;
2221                         }
2222                         if (!btf_is_ptr(t)) {
2223                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2224                                         map_name, btf_kind_str(t));
2225                                 return -EINVAL;
2226                         }
2227                         sz = btf__resolve_size(btf, t->type);
2228                         if (sz < 0) {
2229                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2230                                         map_name, t->type, (ssize_t)sz);
2231                                 return sz;
2232                         }
2233                         if (map_def->key_size && map_def->key_size != sz) {
2234                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2235                                         map_name, map_def->key_size, (ssize_t)sz);
2236                                 return -EINVAL;
2237                         }
2238                         map_def->key_size = sz;
2239                         map_def->key_type_id = t->type;
2240                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2241                 } else if (strcmp(name, "value_size") == 0) {
2242                         __u32 sz;
2243
2244                         if (!get_map_field_int(map_name, btf, m, &sz))
2245                                 return -EINVAL;
2246                         if (map_def->value_size && map_def->value_size != sz) {
2247                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2248                                         map_name, map_def->value_size, sz);
2249                                 return -EINVAL;
2250                         }
2251                         map_def->value_size = sz;
2252                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2253                 } else if (strcmp(name, "value") == 0) {
2254                         __s64 sz;
2255
2256                         t = btf__type_by_id(btf, m->type);
2257                         if (!t) {
2258                                 pr_warn("map '%s': value type [%d] not found.\n",
2259                                         map_name, m->type);
2260                                 return -EINVAL;
2261                         }
2262                         if (!btf_is_ptr(t)) {
2263                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2264                                         map_name, btf_kind_str(t));
2265                                 return -EINVAL;
2266                         }
2267                         sz = btf__resolve_size(btf, t->type);
2268                         if (sz < 0) {
2269                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2270                                         map_name, t->type, (ssize_t)sz);
2271                                 return sz;
2272                         }
2273                         if (map_def->value_size && map_def->value_size != sz) {
2274                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2275                                         map_name, map_def->value_size, (ssize_t)sz);
2276                                 return -EINVAL;
2277                         }
2278                         map_def->value_size = sz;
2279                         map_def->value_type_id = t->type;
2280                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2281                 }
2282                 else if (strcmp(name, "values") == 0) {
2283                         bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2284                         bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2285                         const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2286                         char inner_map_name[128];
2287                         int err;
2288
2289                         if (is_inner) {
2290                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2291                                         map_name);
2292                                 return -ENOTSUP;
2293                         }
2294                         if (i != vlen - 1) {
2295                                 pr_warn("map '%s': '%s' member should be last.\n",
2296                                         map_name, name);
2297                                 return -EINVAL;
2298                         }
2299                         if (!is_map_in_map && !is_prog_array) {
2300                                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2301                                         map_name);
2302                                 return -ENOTSUP;
2303                         }
2304                         if (map_def->value_size && map_def->value_size != 4) {
2305                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2306                                         map_name, map_def->value_size);
2307                                 return -EINVAL;
2308                         }
2309                         map_def->value_size = 4;
2310                         t = btf__type_by_id(btf, m->type);
2311                         if (!t) {
2312                                 pr_warn("map '%s': %s type [%d] not found.\n",
2313                                         map_name, desc, m->type);
2314                                 return -EINVAL;
2315                         }
2316                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2317                                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2318                                         map_name, desc);
2319                                 return -EINVAL;
2320                         }
2321                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2322                         if (!btf_is_ptr(t)) {
2323                                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2324                                         map_name, desc, btf_kind_str(t));
2325                                 return -EINVAL;
2326                         }
2327                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2328                         if (is_prog_array) {
2329                                 if (!btf_is_func_proto(t)) {
2330                                         pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2331                                                 map_name, btf_kind_str(t));
2332                                         return -EINVAL;
2333                                 }
2334                                 continue;
2335                         }
2336                         if (!btf_is_struct(t)) {
2337                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2338                                         map_name, btf_kind_str(t));
2339                                 return -EINVAL;
2340                         }
2341
2342                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2343                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2344                         if (err)
2345                                 return err;
2346
2347                         map_def->parts |= MAP_DEF_INNER_MAP;
2348                 } else if (strcmp(name, "pinning") == 0) {
2349                         __u32 val;
2350
2351                         if (is_inner) {
2352                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2353                                 return -EINVAL;
2354                         }
2355                         if (!get_map_field_int(map_name, btf, m, &val))
2356                                 return -EINVAL;
2357                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2358                                 pr_warn("map '%s': invalid pinning value %u.\n",
2359                                         map_name, val);
2360                                 return -EINVAL;
2361                         }
2362                         map_def->pinning = val;
2363                         map_def->parts |= MAP_DEF_PINNING;
2364                 } else if (strcmp(name, "map_extra") == 0) {
2365                         __u32 map_extra;
2366
2367                         if (!get_map_field_int(map_name, btf, m, &map_extra))
2368                                 return -EINVAL;
2369                         map_def->map_extra = map_extra;
2370                         map_def->parts |= MAP_DEF_MAP_EXTRA;
2371                 } else {
2372                         if (strict) {
2373                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2374                                 return -ENOTSUP;
2375                         }
2376                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2377                 }
2378         }
2379
2380         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2381                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2382                 return -EINVAL;
2383         }
2384
2385         return 0;
2386 }
2387
2388 static size_t adjust_ringbuf_sz(size_t sz)
2389 {
2390         __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2391         __u32 mul;
2392
2393         /* if user forgot to set any size, make sure they see error */
2394         if (sz == 0)
2395                 return 0;
2396         /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2397          * a power-of-2 multiple of kernel's page size. If user diligently
2398          * satisified these conditions, pass the size through.
2399          */
2400         if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2401                 return sz;
2402
2403         /* Otherwise find closest (page_sz * power_of_2) product bigger than
2404          * user-set size to satisfy both user size request and kernel
2405          * requirements and substitute correct max_entries for map creation.
2406          */
2407         for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2408                 if (mul * page_sz > sz)
2409                         return mul * page_sz;
2410         }
2411
2412         /* if it's impossible to satisfy the conditions (i.e., user size is
2413          * very close to UINT_MAX but is not a power-of-2 multiple of
2414          * page_size) then just return original size and let kernel reject it
2415          */
2416         return sz;
2417 }
2418
2419 static bool map_is_ringbuf(const struct bpf_map *map)
2420 {
2421         return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2422                map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2423 }
2424
2425 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2426 {
2427         map->def.type = def->map_type;
2428         map->def.key_size = def->key_size;
2429         map->def.value_size = def->value_size;
2430         map->def.max_entries = def->max_entries;
2431         map->def.map_flags = def->map_flags;
2432         map->map_extra = def->map_extra;
2433
2434         map->numa_node = def->numa_node;
2435         map->btf_key_type_id = def->key_type_id;
2436         map->btf_value_type_id = def->value_type_id;
2437
2438         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2439         if (map_is_ringbuf(map))
2440                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2441
2442         if (def->parts & MAP_DEF_MAP_TYPE)
2443                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2444
2445         if (def->parts & MAP_DEF_KEY_TYPE)
2446                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2447                          map->name, def->key_type_id, def->key_size);
2448         else if (def->parts & MAP_DEF_KEY_SIZE)
2449                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2450
2451         if (def->parts & MAP_DEF_VALUE_TYPE)
2452                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2453                          map->name, def->value_type_id, def->value_size);
2454         else if (def->parts & MAP_DEF_VALUE_SIZE)
2455                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2456
2457         if (def->parts & MAP_DEF_MAX_ENTRIES)
2458                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2459         if (def->parts & MAP_DEF_MAP_FLAGS)
2460                 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2461         if (def->parts & MAP_DEF_MAP_EXTRA)
2462                 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2463                          (unsigned long long)def->map_extra);
2464         if (def->parts & MAP_DEF_PINNING)
2465                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2466         if (def->parts & MAP_DEF_NUMA_NODE)
2467                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2468
2469         if (def->parts & MAP_DEF_INNER_MAP)
2470                 pr_debug("map '%s': found inner map definition.\n", map->name);
2471 }
2472
2473 static const char *btf_var_linkage_str(__u32 linkage)
2474 {
2475         switch (linkage) {
2476         case BTF_VAR_STATIC: return "static";
2477         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2478         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2479         default: return "unknown";
2480         }
2481 }
2482
2483 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2484                                          const struct btf_type *sec,
2485                                          int var_idx, int sec_idx,
2486                                          const Elf_Data *data, bool strict,
2487                                          const char *pin_root_path)
2488 {
2489         struct btf_map_def map_def = {}, inner_def = {};
2490         const struct btf_type *var, *def;
2491         const struct btf_var_secinfo *vi;
2492         const struct btf_var *var_extra;
2493         const char *map_name;
2494         struct bpf_map *map;
2495         int err;
2496
2497         vi = btf_var_secinfos(sec) + var_idx;
2498         var = btf__type_by_id(obj->btf, vi->type);
2499         var_extra = btf_var(var);
2500         map_name = btf__name_by_offset(obj->btf, var->name_off);
2501
2502         if (map_name == NULL || map_name[0] == '\0') {
2503                 pr_warn("map #%d: empty name.\n", var_idx);
2504                 return -EINVAL;
2505         }
2506         if ((__u64)vi->offset + vi->size > data->d_size) {
2507                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2508                 return -EINVAL;
2509         }
2510         if (!btf_is_var(var)) {
2511                 pr_warn("map '%s': unexpected var kind %s.\n",
2512                         map_name, btf_kind_str(var));
2513                 return -EINVAL;
2514         }
2515         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2516                 pr_warn("map '%s': unsupported map linkage %s.\n",
2517                         map_name, btf_var_linkage_str(var_extra->linkage));
2518                 return -EOPNOTSUPP;
2519         }
2520
2521         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2522         if (!btf_is_struct(def)) {
2523                 pr_warn("map '%s': unexpected def kind %s.\n",
2524                         map_name, btf_kind_str(var));
2525                 return -EINVAL;
2526         }
2527         if (def->size > vi->size) {
2528                 pr_warn("map '%s': invalid def size.\n", map_name);
2529                 return -EINVAL;
2530         }
2531
2532         map = bpf_object__add_map(obj);
2533         if (IS_ERR(map))
2534                 return PTR_ERR(map);
2535         map->name = strdup(map_name);
2536         if (!map->name) {
2537                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2538                 return -ENOMEM;
2539         }
2540         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2541         map->def.type = BPF_MAP_TYPE_UNSPEC;
2542         map->sec_idx = sec_idx;
2543         map->sec_offset = vi->offset;
2544         map->btf_var_idx = var_idx;
2545         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2546                  map_name, map->sec_idx, map->sec_offset);
2547
2548         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2549         if (err)
2550                 return err;
2551
2552         fill_map_from_def(map, &map_def);
2553
2554         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2555                 err = build_map_pin_path(map, pin_root_path);
2556                 if (err) {
2557                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2558                         return err;
2559                 }
2560         }
2561
2562         if (map_def.parts & MAP_DEF_INNER_MAP) {
2563                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2564                 if (!map->inner_map)
2565                         return -ENOMEM;
2566                 map->inner_map->fd = -1;
2567                 map->inner_map->sec_idx = sec_idx;
2568                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2569                 if (!map->inner_map->name)
2570                         return -ENOMEM;
2571                 sprintf(map->inner_map->name, "%s.inner", map_name);
2572
2573                 fill_map_from_def(map->inner_map, &inner_def);
2574         }
2575
2576         err = map_fill_btf_type_info(obj, map);
2577         if (err)
2578                 return err;
2579
2580         return 0;
2581 }
2582
2583 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2584                                           const char *pin_root_path)
2585 {
2586         const struct btf_type *sec = NULL;
2587         int nr_types, i, vlen, err;
2588         const struct btf_type *t;
2589         const char *name;
2590         Elf_Data *data;
2591         Elf_Scn *scn;
2592
2593         if (obj->efile.btf_maps_shndx < 0)
2594                 return 0;
2595
2596         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2597         data = elf_sec_data(obj, scn);
2598         if (!scn || !data) {
2599                 pr_warn("elf: failed to get %s map definitions for %s\n",
2600                         MAPS_ELF_SEC, obj->path);
2601                 return -EINVAL;
2602         }
2603
2604         nr_types = btf__type_cnt(obj->btf);
2605         for (i = 1; i < nr_types; i++) {
2606                 t = btf__type_by_id(obj->btf, i);
2607                 if (!btf_is_datasec(t))
2608                         continue;
2609                 name = btf__name_by_offset(obj->btf, t->name_off);
2610                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2611                         sec = t;
2612                         obj->efile.btf_maps_sec_btf_id = i;
2613                         break;
2614                 }
2615         }
2616
2617         if (!sec) {
2618                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2619                 return -ENOENT;
2620         }
2621
2622         vlen = btf_vlen(sec);
2623         for (i = 0; i < vlen; i++) {
2624                 err = bpf_object__init_user_btf_map(obj, sec, i,
2625                                                     obj->efile.btf_maps_shndx,
2626                                                     data, strict,
2627                                                     pin_root_path);
2628                 if (err)
2629                         return err;
2630         }
2631
2632         return 0;
2633 }
2634
2635 static int bpf_object__init_maps(struct bpf_object *obj,
2636                                  const struct bpf_object_open_opts *opts)
2637 {
2638         const char *pin_root_path;
2639         bool strict;
2640         int err = 0;
2641
2642         strict = !OPTS_GET(opts, relaxed_maps, false);
2643         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2644
2645         err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2646         err = err ?: bpf_object__init_global_data_maps(obj);
2647         err = err ?: bpf_object__init_kconfig_map(obj);
2648         err = err ?: bpf_object_init_struct_ops(obj);
2649
2650         return err;
2651 }
2652
2653 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2654 {
2655         Elf64_Shdr *sh;
2656
2657         sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2658         if (!sh)
2659                 return false;
2660
2661         return sh->sh_flags & SHF_EXECINSTR;
2662 }
2663
2664 static bool btf_needs_sanitization(struct bpf_object *obj)
2665 {
2666         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2667         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2668         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2669         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2670         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2671         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2672         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2673
2674         return !has_func || !has_datasec || !has_func_global || !has_float ||
2675                !has_decl_tag || !has_type_tag || !has_enum64;
2676 }
2677
2678 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2679 {
2680         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2681         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2682         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2683         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2684         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2685         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2686         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2687         int enum64_placeholder_id = 0;
2688         struct btf_type *t;
2689         int i, j, vlen;
2690
2691         for (i = 1; i < btf__type_cnt(btf); i++) {
2692                 t = (struct btf_type *)btf__type_by_id(btf, i);
2693
2694                 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2695                         /* replace VAR/DECL_TAG with INT */
2696                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2697                         /*
2698                          * using size = 1 is the safest choice, 4 will be too
2699                          * big and cause kernel BTF validation failure if
2700                          * original variable took less than 4 bytes
2701                          */
2702                         t->size = 1;
2703                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2704                 } else if (!has_datasec && btf_is_datasec(t)) {
2705                         /* replace DATASEC with STRUCT */
2706                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2707                         struct btf_member *m = btf_members(t);
2708                         struct btf_type *vt;
2709                         char *name;
2710
2711                         name = (char *)btf__name_by_offset(btf, t->name_off);
2712                         while (*name) {
2713                                 if (*name == '.')
2714                                         *name = '_';
2715                                 name++;
2716                         }
2717
2718                         vlen = btf_vlen(t);
2719                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2720                         for (j = 0; j < vlen; j++, v++, m++) {
2721                                 /* order of field assignments is important */
2722                                 m->offset = v->offset * 8;
2723                                 m->type = v->type;
2724                                 /* preserve variable name as member name */
2725                                 vt = (void *)btf__type_by_id(btf, v->type);
2726                                 m->name_off = vt->name_off;
2727                         }
2728                 } else if (!has_func && btf_is_func_proto(t)) {
2729                         /* replace FUNC_PROTO with ENUM */
2730                         vlen = btf_vlen(t);
2731                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2732                         t->size = sizeof(__u32); /* kernel enforced */
2733                 } else if (!has_func && btf_is_func(t)) {
2734                         /* replace FUNC with TYPEDEF */
2735                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2736                 } else if (!has_func_global && btf_is_func(t)) {
2737                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2738                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2739                 } else if (!has_float && btf_is_float(t)) {
2740                         /* replace FLOAT with an equally-sized empty STRUCT;
2741                          * since C compilers do not accept e.g. "float" as a
2742                          * valid struct name, make it anonymous
2743                          */
2744                         t->name_off = 0;
2745                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2746                 } else if (!has_type_tag && btf_is_type_tag(t)) {
2747                         /* replace TYPE_TAG with a CONST */
2748                         t->name_off = 0;
2749                         t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2750                 } else if (!has_enum64 && btf_is_enum(t)) {
2751                         /* clear the kflag */
2752                         t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2753                 } else if (!has_enum64 && btf_is_enum64(t)) {
2754                         /* replace ENUM64 with a union */
2755                         struct btf_member *m;
2756
2757                         if (enum64_placeholder_id == 0) {
2758                                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2759                                 if (enum64_placeholder_id < 0)
2760                                         return enum64_placeholder_id;
2761
2762                                 t = (struct btf_type *)btf__type_by_id(btf, i);
2763                         }
2764
2765                         m = btf_members(t);
2766                         vlen = btf_vlen(t);
2767                         t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2768                         for (j = 0; j < vlen; j++, m++) {
2769                                 m->type = enum64_placeholder_id;
2770                                 m->offset = 0;
2771                         }
2772                 }
2773         }
2774
2775         return 0;
2776 }
2777
2778 static bool libbpf_needs_btf(const struct bpf_object *obj)
2779 {
2780         return obj->efile.btf_maps_shndx >= 0 ||
2781                obj->efile.st_ops_shndx >= 0 ||
2782                obj->efile.st_ops_link_shndx >= 0 ||
2783                obj->nr_extern > 0;
2784 }
2785
2786 static bool kernel_needs_btf(const struct bpf_object *obj)
2787 {
2788         return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
2789 }
2790
2791 static int bpf_object__init_btf(struct bpf_object *obj,
2792                                 Elf_Data *btf_data,
2793                                 Elf_Data *btf_ext_data)
2794 {
2795         int err = -ENOENT;
2796
2797         if (btf_data) {
2798                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2799                 err = libbpf_get_error(obj->btf);
2800                 if (err) {
2801                         obj->btf = NULL;
2802                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2803                         goto out;
2804                 }
2805                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2806                 btf__set_pointer_size(obj->btf, 8);
2807         }
2808         if (btf_ext_data) {
2809                 struct btf_ext_info *ext_segs[3];
2810                 int seg_num, sec_num;
2811
2812                 if (!obj->btf) {
2813                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2814                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2815                         goto out;
2816                 }
2817                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2818                 err = libbpf_get_error(obj->btf_ext);
2819                 if (err) {
2820                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2821                                 BTF_EXT_ELF_SEC, err);
2822                         obj->btf_ext = NULL;
2823                         goto out;
2824                 }
2825
2826                 /* setup .BTF.ext to ELF section mapping */
2827                 ext_segs[0] = &obj->btf_ext->func_info;
2828                 ext_segs[1] = &obj->btf_ext->line_info;
2829                 ext_segs[2] = &obj->btf_ext->core_relo_info;
2830                 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
2831                         struct btf_ext_info *seg = ext_segs[seg_num];
2832                         const struct btf_ext_info_sec *sec;
2833                         const char *sec_name;
2834                         Elf_Scn *scn;
2835
2836                         if (seg->sec_cnt == 0)
2837                                 continue;
2838
2839                         seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
2840                         if (!seg->sec_idxs) {
2841                                 err = -ENOMEM;
2842                                 goto out;
2843                         }
2844
2845                         sec_num = 0;
2846                         for_each_btf_ext_sec(seg, sec) {
2847                                 /* preventively increment index to avoid doing
2848                                  * this before every continue below
2849                                  */
2850                                 sec_num++;
2851
2852                                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
2853                                 if (str_is_empty(sec_name))
2854                                         continue;
2855                                 scn = elf_sec_by_name(obj, sec_name);
2856                                 if (!scn)
2857                                         continue;
2858
2859                                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
2860                         }
2861                 }
2862         }
2863 out:
2864         if (err && libbpf_needs_btf(obj)) {
2865                 pr_warn("BTF is required, but is missing or corrupted.\n");
2866                 return err;
2867         }
2868         return 0;
2869 }
2870
2871 static int compare_vsi_off(const void *_a, const void *_b)
2872 {
2873         const struct btf_var_secinfo *a = _a;
2874         const struct btf_var_secinfo *b = _b;
2875
2876         return a->offset - b->offset;
2877 }
2878
2879 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
2880                              struct btf_type *t)
2881 {
2882         __u32 size = 0, i, vars = btf_vlen(t);
2883         const char *sec_name = btf__name_by_offset(btf, t->name_off);
2884         struct btf_var_secinfo *vsi;
2885         bool fixup_offsets = false;
2886         int err;
2887
2888         if (!sec_name) {
2889                 pr_debug("No name found in string section for DATASEC kind.\n");
2890                 return -ENOENT;
2891         }
2892
2893         /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
2894          * variable offsets set at the previous step. Further, not every
2895          * extern BTF VAR has corresponding ELF symbol preserved, so we skip
2896          * all fixups altogether for such sections and go straight to sorting
2897          * VARs within their DATASEC.
2898          */
2899         if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
2900                 goto sort_vars;
2901
2902         /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
2903          * fix this up. But BPF static linker already fixes this up and fills
2904          * all the sizes and offsets during static linking. So this step has
2905          * to be optional. But the STV_HIDDEN handling is non-optional for any
2906          * non-extern DATASEC, so the variable fixup loop below handles both
2907          * functions at the same time, paying the cost of BTF VAR <-> ELF
2908          * symbol matching just once.
2909          */
2910         if (t->size == 0) {
2911                 err = find_elf_sec_sz(obj, sec_name, &size);
2912                 if (err || !size) {
2913                         pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
2914                                  sec_name, size, err);
2915                         return -ENOENT;
2916                 }
2917
2918                 t->size = size;
2919                 fixup_offsets = true;
2920         }
2921
2922         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
2923                 const struct btf_type *t_var;
2924                 struct btf_var *var;
2925                 const char *var_name;
2926                 Elf64_Sym *sym;
2927
2928                 t_var = btf__type_by_id(btf, vsi->type);
2929                 if (!t_var || !btf_is_var(t_var)) {
2930                         pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
2931                         return -EINVAL;
2932                 }
2933
2934                 var = btf_var(t_var);
2935                 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
2936                         continue;
2937
2938                 var_name = btf__name_by_offset(btf, t_var->name_off);
2939                 if (!var_name) {
2940                         pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
2941                                  sec_name, i);
2942                         return -ENOENT;
2943                 }
2944
2945                 sym = find_elf_var_sym(obj, var_name);
2946                 if (IS_ERR(sym)) {
2947                         pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
2948                                  sec_name, var_name);
2949                         return -ENOENT;
2950                 }
2951
2952                 if (fixup_offsets)
2953                         vsi->offset = sym->st_value;
2954
2955                 /* if variable is a global/weak symbol, but has restricted
2956                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
2957                  * as static. This follows similar logic for functions (BPF
2958                  * subprogs) and influences libbpf's further decisions about
2959                  * whether to make global data BPF array maps as
2960                  * BPF_F_MMAPABLE.
2961                  */
2962                 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
2963                     || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
2964                         var->linkage = BTF_VAR_STATIC;
2965         }
2966
2967 sort_vars:
2968         qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
2969         return 0;
2970 }
2971
2972 static int bpf_object_fixup_btf(struct bpf_object *obj)
2973 {
2974         int i, n, err = 0;
2975
2976         if (!obj->btf)
2977                 return 0;
2978
2979         n = btf__type_cnt(obj->btf);
2980         for (i = 1; i < n; i++) {
2981                 struct btf_type *t = btf_type_by_id(obj->btf, i);
2982
2983                 /* Loader needs to fix up some of the things compiler
2984                  * couldn't get its hands on while emitting BTF. This
2985                  * is section size and global variable offset. We use
2986                  * the info from the ELF itself for this purpose.
2987                  */
2988                 if (btf_is_datasec(t)) {
2989                         err = btf_fixup_datasec(obj, obj->btf, t);
2990                         if (err)
2991                                 return err;
2992                 }
2993         }
2994
2995         return 0;
2996 }
2997
2998 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
2999 {
3000         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3001             prog->type == BPF_PROG_TYPE_LSM)
3002                 return true;
3003
3004         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3005          * also need vmlinux BTF
3006          */
3007         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3008                 return true;
3009
3010         return false;
3011 }
3012
3013 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3014 {
3015         struct bpf_program *prog;
3016         int i;
3017
3018         /* CO-RE relocations need kernel BTF, only when btf_custom_path
3019          * is not specified
3020          */
3021         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3022                 return true;
3023
3024         /* Support for typed ksyms needs kernel BTF */
3025         for (i = 0; i < obj->nr_extern; i++) {
3026                 const struct extern_desc *ext;
3027
3028                 ext = &obj->externs[i];
3029                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3030                         return true;
3031         }
3032
3033         bpf_object__for_each_program(prog, obj) {
3034                 if (!prog->autoload)
3035                         continue;
3036                 if (prog_needs_vmlinux_btf(prog))
3037                         return true;
3038         }
3039
3040         return false;
3041 }
3042
3043 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3044 {
3045         int err;
3046
3047         /* btf_vmlinux could be loaded earlier */
3048         if (obj->btf_vmlinux || obj->gen_loader)
3049                 return 0;
3050
3051         if (!force && !obj_needs_vmlinux_btf(obj))
3052                 return 0;
3053
3054         obj->btf_vmlinux = btf__load_vmlinux_btf();
3055         err = libbpf_get_error(obj->btf_vmlinux);
3056         if (err) {
3057                 pr_warn("Error loading vmlinux BTF: %d\n", err);
3058                 obj->btf_vmlinux = NULL;
3059                 return err;
3060         }
3061         return 0;
3062 }
3063
3064 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3065 {
3066         struct btf *kern_btf = obj->btf;
3067         bool btf_mandatory, sanitize;
3068         int i, err = 0;
3069
3070         if (!obj->btf)
3071                 return 0;
3072
3073         if (!kernel_supports(obj, FEAT_BTF)) {
3074                 if (kernel_needs_btf(obj)) {
3075                         err = -EOPNOTSUPP;
3076                         goto report;
3077                 }
3078                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3079                 return 0;
3080         }
3081
3082         /* Even though some subprogs are global/weak, user might prefer more
3083          * permissive BPF verification process that BPF verifier performs for
3084          * static functions, taking into account more context from the caller
3085          * functions. In such case, they need to mark such subprogs with
3086          * __attribute__((visibility("hidden"))) and libbpf will adjust
3087          * corresponding FUNC BTF type to be marked as static and trigger more
3088          * involved BPF verification process.
3089          */
3090         for (i = 0; i < obj->nr_programs; i++) {
3091                 struct bpf_program *prog = &obj->programs[i];
3092                 struct btf_type *t;
3093                 const char *name;
3094                 int j, n;
3095
3096                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3097                         continue;
3098
3099                 n = btf__type_cnt(obj->btf);
3100                 for (j = 1; j < n; j++) {
3101                         t = btf_type_by_id(obj->btf, j);
3102                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3103                                 continue;
3104
3105                         name = btf__str_by_offset(obj->btf, t->name_off);
3106                         if (strcmp(name, prog->name) != 0)
3107                                 continue;
3108
3109                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3110                         break;
3111                 }
3112         }
3113
3114         sanitize = btf_needs_sanitization(obj);
3115         if (sanitize) {
3116                 const void *raw_data;
3117                 __u32 sz;
3118
3119                 /* clone BTF to sanitize a copy and leave the original intact */
3120                 raw_data = btf__raw_data(obj->btf, &sz);
3121                 kern_btf = btf__new(raw_data, sz);
3122                 err = libbpf_get_error(kern_btf);
3123                 if (err)
3124                         return err;
3125
3126                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3127                 btf__set_pointer_size(obj->btf, 8);
3128                 err = bpf_object__sanitize_btf(obj, kern_btf);
3129                 if (err)
3130                         return err;
3131         }
3132
3133         if (obj->gen_loader) {
3134                 __u32 raw_size = 0;
3135                 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3136
3137                 if (!raw_data)
3138                         return -ENOMEM;
3139                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3140                 /* Pretend to have valid FD to pass various fd >= 0 checks.
3141                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3142                  */
3143                 btf__set_fd(kern_btf, 0);
3144         } else {
3145                 /* currently BPF_BTF_LOAD only supports log_level 1 */
3146                 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3147                                            obj->log_level ? 1 : 0);
3148         }
3149         if (sanitize) {
3150                 if (!err) {
3151                         /* move fd to libbpf's BTF */
3152                         btf__set_fd(obj->btf, btf__fd(kern_btf));
3153                         btf__set_fd(kern_btf, -1);
3154                 }
3155                 btf__free(kern_btf);
3156         }
3157 report:
3158         if (err) {
3159                 btf_mandatory = kernel_needs_btf(obj);
3160                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3161                         btf_mandatory ? "BTF is mandatory, can't proceed."
3162                                       : "BTF is optional, ignoring.");
3163                 if (!btf_mandatory)
3164                         err = 0;
3165         }
3166         return err;
3167 }
3168
3169 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3170 {
3171         const char *name;
3172
3173         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3174         if (!name) {
3175                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3176                         off, obj->path, elf_errmsg(-1));
3177                 return NULL;
3178         }
3179
3180         return name;
3181 }
3182
3183 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3184 {
3185         const char *name;
3186
3187         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3188         if (!name) {
3189                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3190                         off, obj->path, elf_errmsg(-1));
3191                 return NULL;
3192         }
3193
3194         return name;
3195 }
3196
3197 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3198 {
3199         Elf_Scn *scn;
3200
3201         scn = elf_getscn(obj->efile.elf, idx);
3202         if (!scn) {
3203                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3204                         idx, obj->path, elf_errmsg(-1));
3205                 return NULL;
3206         }
3207         return scn;
3208 }
3209
3210 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3211 {
3212         Elf_Scn *scn = NULL;
3213         Elf *elf = obj->efile.elf;
3214         const char *sec_name;
3215
3216         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3217                 sec_name = elf_sec_name(obj, scn);
3218                 if (!sec_name)
3219                         return NULL;
3220
3221                 if (strcmp(sec_name, name) != 0)
3222                         continue;
3223
3224                 return scn;
3225         }
3226         return NULL;
3227 }
3228
3229 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3230 {
3231         Elf64_Shdr *shdr;
3232
3233         if (!scn)
3234                 return NULL;
3235
3236         shdr = elf64_getshdr(scn);
3237         if (!shdr) {
3238                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3239                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3240                 return NULL;
3241         }
3242
3243         return shdr;
3244 }
3245
3246 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3247 {
3248         const char *name;
3249         Elf64_Shdr *sh;
3250
3251         if (!scn)
3252                 return NULL;
3253
3254         sh = elf_sec_hdr(obj, scn);
3255         if (!sh)
3256                 return NULL;
3257
3258         name = elf_sec_str(obj, sh->sh_name);
3259         if (!name) {
3260                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3261                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3262                 return NULL;
3263         }
3264
3265         return name;
3266 }
3267
3268 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3269 {
3270         Elf_Data *data;
3271
3272         if (!scn)
3273                 return NULL;
3274
3275         data = elf_getdata(scn, 0);
3276         if (!data) {
3277                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3278                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3279                         obj->path, elf_errmsg(-1));
3280                 return NULL;
3281         }
3282
3283         return data;
3284 }
3285
3286 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3287 {
3288         if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3289                 return NULL;
3290
3291         return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3292 }
3293
3294 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3295 {
3296         if (idx >= data->d_size / sizeof(Elf64_Rel))
3297                 return NULL;
3298
3299         return (Elf64_Rel *)data->d_buf + idx;
3300 }
3301
3302 static bool is_sec_name_dwarf(const char *name)
3303 {
3304         /* approximation, but the actual list is too long */
3305         return str_has_pfx(name, ".debug_");
3306 }
3307
3308 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3309 {
3310         /* no special handling of .strtab */
3311         if (hdr->sh_type == SHT_STRTAB)
3312                 return true;
3313
3314         /* ignore .llvm_addrsig section as well */
3315         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3316                 return true;
3317
3318         /* no subprograms will lead to an empty .text section, ignore it */
3319         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3320             strcmp(name, ".text") == 0)
3321                 return true;
3322
3323         /* DWARF sections */
3324         if (is_sec_name_dwarf(name))
3325                 return true;
3326
3327         if (str_has_pfx(name, ".rel")) {
3328                 name += sizeof(".rel") - 1;
3329                 /* DWARF section relocations */
3330                 if (is_sec_name_dwarf(name))
3331                         return true;
3332
3333                 /* .BTF and .BTF.ext don't need relocations */
3334                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3335                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
3336                         return true;
3337         }
3338
3339         return false;
3340 }
3341
3342 static int cmp_progs(const void *_a, const void *_b)
3343 {
3344         const struct bpf_program *a = _a;
3345         const struct bpf_program *b = _b;
3346
3347         if (a->sec_idx != b->sec_idx)
3348                 return a->sec_idx < b->sec_idx ? -1 : 1;
3349
3350         /* sec_insn_off can't be the same within the section */
3351         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3352 }
3353
3354 static int bpf_object__elf_collect(struct bpf_object *obj)
3355 {
3356         struct elf_sec_desc *sec_desc;
3357         Elf *elf = obj->efile.elf;
3358         Elf_Data *btf_ext_data = NULL;
3359         Elf_Data *btf_data = NULL;
3360         int idx = 0, err = 0;
3361         const char *name;
3362         Elf_Data *data;
3363         Elf_Scn *scn;
3364         Elf64_Shdr *sh;
3365
3366         /* ELF section indices are 0-based, but sec #0 is special "invalid"
3367          * section. Since section count retrieved by elf_getshdrnum() does
3368          * include sec #0, it is already the necessary size of an array to keep
3369          * all the sections.
3370          */
3371         if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3372                 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3373                         obj->path, elf_errmsg(-1));
3374                 return -LIBBPF_ERRNO__FORMAT;
3375         }
3376         obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3377         if (!obj->efile.secs)
3378                 return -ENOMEM;
3379
3380         /* a bunch of ELF parsing functionality depends on processing symbols,
3381          * so do the first pass and find the symbol table
3382          */
3383         scn = NULL;
3384         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3385                 sh = elf_sec_hdr(obj, scn);
3386                 if (!sh)
3387                         return -LIBBPF_ERRNO__FORMAT;
3388
3389                 if (sh->sh_type == SHT_SYMTAB) {
3390                         if (obj->efile.symbols) {
3391                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3392                                 return -LIBBPF_ERRNO__FORMAT;
3393                         }
3394
3395                         data = elf_sec_data(obj, scn);
3396                         if (!data)
3397                                 return -LIBBPF_ERRNO__FORMAT;
3398
3399                         idx = elf_ndxscn(scn);
3400
3401                         obj->efile.symbols = data;
3402                         obj->efile.symbols_shndx = idx;
3403                         obj->efile.strtabidx = sh->sh_link;
3404                 }
3405         }
3406
3407         if (!obj->efile.symbols) {
3408                 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3409                         obj->path);
3410                 return -ENOENT;
3411         }
3412
3413         scn = NULL;
3414         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3415                 idx = elf_ndxscn(scn);
3416                 sec_desc = &obj->efile.secs[idx];
3417
3418                 sh = elf_sec_hdr(obj, scn);
3419                 if (!sh)
3420                         return -LIBBPF_ERRNO__FORMAT;
3421
3422                 name = elf_sec_str(obj, sh->sh_name);
3423                 if (!name)
3424                         return -LIBBPF_ERRNO__FORMAT;
3425
3426                 if (ignore_elf_section(sh, name))
3427                         continue;
3428
3429                 data = elf_sec_data(obj, scn);
3430                 if (!data)
3431                         return -LIBBPF_ERRNO__FORMAT;
3432
3433                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3434                          idx, name, (unsigned long)data->d_size,
3435                          (int)sh->sh_link, (unsigned long)sh->sh_flags,
3436                          (int)sh->sh_type);
3437
3438                 if (strcmp(name, "license") == 0) {
3439                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3440                         if (err)
3441                                 return err;
3442                 } else if (strcmp(name, "version") == 0) {
3443                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3444                         if (err)
3445                                 return err;
3446                 } else if (strcmp(name, "maps") == 0) {
3447                         pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3448                         return -ENOTSUP;
3449                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3450                         obj->efile.btf_maps_shndx = idx;
3451                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3452                         if (sh->sh_type != SHT_PROGBITS)
3453                                 return -LIBBPF_ERRNO__FORMAT;
3454                         btf_data = data;
3455                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3456                         if (sh->sh_type != SHT_PROGBITS)
3457                                 return -LIBBPF_ERRNO__FORMAT;
3458                         btf_ext_data = data;
3459                 } else if (sh->sh_type == SHT_SYMTAB) {
3460                         /* already processed during the first pass above */
3461                 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3462                         if (sh->sh_flags & SHF_EXECINSTR) {
3463                                 if (strcmp(name, ".text") == 0)
3464                                         obj->efile.text_shndx = idx;
3465                                 err = bpf_object__add_programs(obj, data, name, idx);
3466                                 if (err)
3467                                         return err;
3468                         } else if (strcmp(name, DATA_SEC) == 0 ||
3469                                    str_has_pfx(name, DATA_SEC ".")) {
3470                                 sec_desc->sec_type = SEC_DATA;
3471                                 sec_desc->shdr = sh;
3472                                 sec_desc->data = data;
3473                         } else if (strcmp(name, RODATA_SEC) == 0 ||
3474                                    str_has_pfx(name, RODATA_SEC ".")) {
3475                                 sec_desc->sec_type = SEC_RODATA;
3476                                 sec_desc->shdr = sh;
3477                                 sec_desc->data = data;
3478                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3479                                 obj->efile.st_ops_data = data;
3480                                 obj->efile.st_ops_shndx = idx;
3481                         } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
3482                                 obj->efile.st_ops_link_data = data;
3483                                 obj->efile.st_ops_link_shndx = idx;
3484                         } else {
3485                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3486                                         idx, name);
3487                         }
3488                 } else if (sh->sh_type == SHT_REL) {
3489                         int targ_sec_idx = sh->sh_info; /* points to other section */
3490
3491                         if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3492                             targ_sec_idx >= obj->efile.sec_cnt)
3493                                 return -LIBBPF_ERRNO__FORMAT;
3494
3495                         /* Only do relo for section with exec instructions */
3496                         if (!section_have_execinstr(obj, targ_sec_idx) &&
3497                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3498                             strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3499                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3500                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3501                                         idx, name, targ_sec_idx,
3502                                         elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3503                                 continue;
3504                         }
3505
3506                         sec_desc->sec_type = SEC_RELO;
3507                         sec_desc->shdr = sh;
3508                         sec_desc->data = data;
3509                 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3510                                                          str_has_pfx(name, BSS_SEC "."))) {
3511                         sec_desc->sec_type = SEC_BSS;
3512                         sec_desc->shdr = sh;
3513                         sec_desc->data = data;
3514                 } else {
3515                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3516                                 (size_t)sh->sh_size);
3517                 }
3518         }
3519
3520         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3521                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3522                 return -LIBBPF_ERRNO__FORMAT;
3523         }
3524
3525         /* sort BPF programs by section name and in-section instruction offset
3526          * for faster search
3527          */
3528         if (obj->nr_programs)
3529                 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3530
3531         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3532 }
3533
3534 static bool sym_is_extern(const Elf64_Sym *sym)
3535 {
3536         int bind = ELF64_ST_BIND(sym->st_info);
3537         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3538         return sym->st_shndx == SHN_UNDEF &&
3539                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3540                ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3541 }
3542
3543 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3544 {
3545         int bind = ELF64_ST_BIND(sym->st_info);
3546         int type = ELF64_ST_TYPE(sym->st_info);
3547
3548         /* in .text section */
3549         if (sym->st_shndx != text_shndx)
3550                 return false;
3551
3552         /* local function */
3553         if (bind == STB_LOCAL && type == STT_SECTION)
3554                 return true;
3555
3556         /* global function */
3557         return bind == STB_GLOBAL && type == STT_FUNC;
3558 }
3559
3560 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3561 {
3562         const struct btf_type *t;
3563         const char *tname;
3564         int i, n;
3565
3566         if (!btf)
3567                 return -ESRCH;
3568
3569         n = btf__type_cnt(btf);
3570         for (i = 1; i < n; i++) {
3571                 t = btf__type_by_id(btf, i);
3572
3573                 if (!btf_is_var(t) && !btf_is_func(t))
3574                         continue;
3575
3576                 tname = btf__name_by_offset(btf, t->name_off);
3577                 if (strcmp(tname, ext_name))
3578                         continue;
3579
3580                 if (btf_is_var(t) &&
3581                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3582                         return -EINVAL;
3583
3584                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3585                         return -EINVAL;
3586
3587                 return i;
3588         }
3589
3590         return -ENOENT;
3591 }
3592
3593 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3594         const struct btf_var_secinfo *vs;
3595         const struct btf_type *t;
3596         int i, j, n;
3597
3598         if (!btf)
3599                 return -ESRCH;
3600
3601         n = btf__type_cnt(btf);
3602         for (i = 1; i < n; i++) {
3603                 t = btf__type_by_id(btf, i);
3604
3605                 if (!btf_is_datasec(t))
3606                         continue;
3607
3608                 vs = btf_var_secinfos(t);
3609                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3610                         if (vs->type == ext_btf_id)
3611                                 return i;
3612                 }
3613         }
3614
3615         return -ENOENT;
3616 }
3617
3618 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3619                                      bool *is_signed)
3620 {
3621         const struct btf_type *t;
3622         const char *name;
3623
3624         t = skip_mods_and_typedefs(btf, id, NULL);
3625         name = btf__name_by_offset(btf, t->name_off);
3626
3627         if (is_signed)
3628                 *is_signed = false;
3629         switch (btf_kind(t)) {
3630         case BTF_KIND_INT: {
3631                 int enc = btf_int_encoding(t);
3632
3633                 if (enc & BTF_INT_BOOL)
3634                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3635                 if (is_signed)
3636                         *is_signed = enc & BTF_INT_SIGNED;
3637                 if (t->size == 1)
3638                         return KCFG_CHAR;
3639                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3640                         return KCFG_UNKNOWN;
3641                 return KCFG_INT;
3642         }
3643         case BTF_KIND_ENUM:
3644                 if (t->size != 4)
3645                         return KCFG_UNKNOWN;
3646                 if (strcmp(name, "libbpf_tristate"))
3647                         return KCFG_UNKNOWN;
3648                 return KCFG_TRISTATE;
3649         case BTF_KIND_ENUM64:
3650                 if (strcmp(name, "libbpf_tristate"))
3651                         return KCFG_UNKNOWN;
3652                 return KCFG_TRISTATE;
3653         case BTF_KIND_ARRAY:
3654                 if (btf_array(t)->nelems == 0)
3655                         return KCFG_UNKNOWN;
3656                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3657                         return KCFG_UNKNOWN;
3658                 return KCFG_CHAR_ARR;
3659         default:
3660                 return KCFG_UNKNOWN;
3661         }
3662 }
3663
3664 static int cmp_externs(const void *_a, const void *_b)
3665 {
3666         const struct extern_desc *a = _a;
3667         const struct extern_desc *b = _b;
3668
3669         if (a->type != b->type)
3670                 return a->type < b->type ? -1 : 1;
3671
3672         if (a->type == EXT_KCFG) {
3673                 /* descending order by alignment requirements */
3674                 if (a->kcfg.align != b->kcfg.align)
3675                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3676                 /* ascending order by size, within same alignment class */
3677                 if (a->kcfg.sz != b->kcfg.sz)
3678                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3679         }
3680
3681         /* resolve ties by name */
3682         return strcmp(a->name, b->name);
3683 }
3684
3685 static int find_int_btf_id(const struct btf *btf)
3686 {
3687         const struct btf_type *t;
3688         int i, n;
3689
3690         n = btf__type_cnt(btf);
3691         for (i = 1; i < n; i++) {
3692                 t = btf__type_by_id(btf, i);
3693
3694                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3695                         return i;
3696         }
3697
3698         return 0;
3699 }
3700
3701 static int add_dummy_ksym_var(struct btf *btf)
3702 {
3703         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3704         const struct btf_var_secinfo *vs;
3705         const struct btf_type *sec;
3706
3707         if (!btf)
3708                 return 0;
3709
3710         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3711                                             BTF_KIND_DATASEC);
3712         if (sec_btf_id < 0)
3713                 return 0;
3714
3715         sec = btf__type_by_id(btf, sec_btf_id);
3716         vs = btf_var_secinfos(sec);
3717         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3718                 const struct btf_type *vt;
3719
3720                 vt = btf__type_by_id(btf, vs->type);
3721                 if (btf_is_func(vt))
3722                         break;
3723         }
3724
3725         /* No func in ksyms sec.  No need to add dummy var. */
3726         if (i == btf_vlen(sec))
3727                 return 0;
3728
3729         int_btf_id = find_int_btf_id(btf);
3730         dummy_var_btf_id = btf__add_var(btf,
3731                                         "dummy_ksym",
3732                                         BTF_VAR_GLOBAL_ALLOCATED,
3733                                         int_btf_id);
3734         if (dummy_var_btf_id < 0)
3735                 pr_warn("cannot create a dummy_ksym var\n");
3736
3737         return dummy_var_btf_id;
3738 }
3739
3740 static int bpf_object__collect_externs(struct bpf_object *obj)
3741 {
3742         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3743         const struct btf_type *t;
3744         struct extern_desc *ext;
3745         int i, n, off, dummy_var_btf_id;
3746         const char *ext_name, *sec_name;
3747         Elf_Scn *scn;
3748         Elf64_Shdr *sh;
3749
3750         if (!obj->efile.symbols)
3751                 return 0;
3752
3753         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3754         sh = elf_sec_hdr(obj, scn);
3755         if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3756                 return -LIBBPF_ERRNO__FORMAT;
3757
3758         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3759         if (dummy_var_btf_id < 0)
3760                 return dummy_var_btf_id;
3761
3762         n = sh->sh_size / sh->sh_entsize;
3763         pr_debug("looking for externs among %d symbols...\n", n);
3764
3765         for (i = 0; i < n; i++) {
3766                 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
3767
3768                 if (!sym)
3769                         return -LIBBPF_ERRNO__FORMAT;
3770                 if (!sym_is_extern(sym))
3771                         continue;
3772                 ext_name = elf_sym_str(obj, sym->st_name);
3773                 if (!ext_name || !ext_name[0])
3774                         continue;
3775
3776                 ext = obj->externs;
3777                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3778                 if (!ext)
3779                         return -ENOMEM;
3780                 obj->externs = ext;
3781                 ext = &ext[obj->nr_extern];
3782                 memset(ext, 0, sizeof(*ext));
3783                 obj->nr_extern++;
3784
3785                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3786                 if (ext->btf_id <= 0) {
3787                         pr_warn("failed to find BTF for extern '%s': %d\n",
3788                                 ext_name, ext->btf_id);
3789                         return ext->btf_id;
3790                 }
3791                 t = btf__type_by_id(obj->btf, ext->btf_id);
3792                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3793                 ext->sym_idx = i;
3794                 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
3795
3796                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3797                 if (ext->sec_btf_id <= 0) {
3798                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3799                                 ext_name, ext->btf_id, ext->sec_btf_id);
3800                         return ext->sec_btf_id;
3801                 }
3802                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3803                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3804
3805                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3806                         if (btf_is_func(t)) {
3807                                 pr_warn("extern function %s is unsupported under %s section\n",
3808                                         ext->name, KCONFIG_SEC);
3809                                 return -ENOTSUP;
3810                         }
3811                         kcfg_sec = sec;
3812                         ext->type = EXT_KCFG;
3813                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3814                         if (ext->kcfg.sz <= 0) {
3815                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3816                                         ext_name, ext->kcfg.sz);
3817                                 return ext->kcfg.sz;
3818                         }
3819                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3820                         if (ext->kcfg.align <= 0) {
3821                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3822                                         ext_name, ext->kcfg.align);
3823                                 return -EINVAL;
3824                         }
3825                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3826                                                         &ext->kcfg.is_signed);
3827                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3828                                 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
3829                                 return -ENOTSUP;
3830                         }
3831                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3832                         ksym_sec = sec;
3833                         ext->type = EXT_KSYM;
3834                         skip_mods_and_typedefs(obj->btf, t->type,
3835                                                &ext->ksym.type_id);
3836                 } else {
3837                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3838                         return -ENOTSUP;
3839                 }
3840         }
3841         pr_debug("collected %d externs total\n", obj->nr_extern);
3842
3843         if (!obj->nr_extern)
3844                 return 0;
3845
3846         /* sort externs by type, for kcfg ones also by (align, size, name) */
3847         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3848
3849         /* for .ksyms section, we need to turn all externs into allocated
3850          * variables in BTF to pass kernel verification; we do this by
3851          * pretending that each extern is a 8-byte variable
3852          */
3853         if (ksym_sec) {
3854                 /* find existing 4-byte integer type in BTF to use for fake
3855                  * extern variables in DATASEC
3856                  */
3857                 int int_btf_id = find_int_btf_id(obj->btf);
3858                 /* For extern function, a dummy_var added earlier
3859                  * will be used to replace the vs->type and
3860                  * its name string will be used to refill
3861                  * the missing param's name.
3862                  */
3863                 const struct btf_type *dummy_var;
3864
3865                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3866                 for (i = 0; i < obj->nr_extern; i++) {
3867                         ext = &obj->externs[i];
3868                         if (ext->type != EXT_KSYM)
3869                                 continue;
3870                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3871                                  i, ext->sym_idx, ext->name);
3872                 }
3873
3874                 sec = ksym_sec;
3875                 n = btf_vlen(sec);
3876                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3877                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3878                         struct btf_type *vt;
3879
3880                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3881                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3882                         ext = find_extern_by_name(obj, ext_name);
3883                         if (!ext) {
3884                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3885                                         btf_kind_str(vt), ext_name);
3886                                 return -ESRCH;
3887                         }
3888                         if (btf_is_func(vt)) {
3889                                 const struct btf_type *func_proto;
3890                                 struct btf_param *param;
3891                                 int j;
3892
3893                                 func_proto = btf__type_by_id(obj->btf,
3894                                                              vt->type);
3895                                 param = btf_params(func_proto);
3896                                 /* Reuse the dummy_var string if the
3897                                  * func proto does not have param name.
3898                                  */
3899                                 for (j = 0; j < btf_vlen(func_proto); j++)
3900                                         if (param[j].type && !param[j].name_off)
3901                                                 param[j].name_off =
3902                                                         dummy_var->name_off;
3903                                 vs->type = dummy_var_btf_id;
3904                                 vt->info &= ~0xffff;
3905                                 vt->info |= BTF_FUNC_GLOBAL;
3906                         } else {
3907                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3908                                 vt->type = int_btf_id;
3909                         }
3910                         vs->offset = off;
3911                         vs->size = sizeof(int);
3912                 }
3913                 sec->size = off;
3914         }
3915
3916         if (kcfg_sec) {
3917                 sec = kcfg_sec;
3918                 /* for kcfg externs calculate their offsets within a .kconfig map */
3919                 off = 0;
3920                 for (i = 0; i < obj->nr_extern; i++) {
3921                         ext = &obj->externs[i];
3922                         if (ext->type != EXT_KCFG)
3923                                 continue;
3924
3925                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
3926                         off = ext->kcfg.data_off + ext->kcfg.sz;
3927                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
3928                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
3929                 }
3930                 sec->size = off;
3931                 n = btf_vlen(sec);
3932                 for (i = 0; i < n; i++) {
3933                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3934
3935                         t = btf__type_by_id(obj->btf, vs->type);
3936                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
3937                         ext = find_extern_by_name(obj, ext_name);
3938                         if (!ext) {
3939                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
3940                                         ext_name);
3941                                 return -ESRCH;
3942                         }
3943                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
3944                         vs->offset = ext->kcfg.data_off;
3945                 }
3946         }
3947         return 0;
3948 }
3949
3950 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
3951 {
3952         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
3953 }
3954
3955 struct bpf_program *
3956 bpf_object__find_program_by_name(const struct bpf_object *obj,
3957                                  const char *name)
3958 {
3959         struct bpf_program *prog;
3960
3961         bpf_object__for_each_program(prog, obj) {
3962                 if (prog_is_subprog(obj, prog))
3963                         continue;
3964                 if (!strcmp(prog->name, name))
3965                         return prog;
3966         }
3967         return errno = ENOENT, NULL;
3968 }
3969
3970 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
3971                                       int shndx)
3972 {
3973         switch (obj->efile.secs[shndx].sec_type) {
3974         case SEC_BSS:
3975         case SEC_DATA:
3976         case SEC_RODATA:
3977                 return true;
3978         default:
3979                 return false;
3980         }
3981 }
3982
3983 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
3984                                       int shndx)
3985 {
3986         return shndx == obj->efile.btf_maps_shndx;
3987 }
3988
3989 static enum libbpf_map_type
3990 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
3991 {
3992         if (shndx == obj->efile.symbols_shndx)
3993                 return LIBBPF_MAP_KCONFIG;
3994
3995         switch (obj->efile.secs[shndx].sec_type) {
3996         case SEC_BSS:
3997                 return LIBBPF_MAP_BSS;
3998         case SEC_DATA:
3999                 return LIBBPF_MAP_DATA;
4000         case SEC_RODATA:
4001                 return LIBBPF_MAP_RODATA;
4002         default:
4003                 return LIBBPF_MAP_UNSPEC;
4004         }
4005 }
4006
4007 static int bpf_program__record_reloc(struct bpf_program *prog,
4008                                      struct reloc_desc *reloc_desc,
4009                                      __u32 insn_idx, const char *sym_name,
4010                                      const Elf64_Sym *sym, const Elf64_Rel *rel)
4011 {
4012         struct bpf_insn *insn = &prog->insns[insn_idx];
4013         size_t map_idx, nr_maps = prog->obj->nr_maps;
4014         struct bpf_object *obj = prog->obj;
4015         __u32 shdr_idx = sym->st_shndx;
4016         enum libbpf_map_type type;
4017         const char *sym_sec_name;
4018         struct bpf_map *map;
4019
4020         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4021                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4022                         prog->name, sym_name, insn_idx, insn->code);
4023                 return -LIBBPF_ERRNO__RELOC;
4024         }
4025
4026         if (sym_is_extern(sym)) {
4027                 int sym_idx = ELF64_R_SYM(rel->r_info);
4028                 int i, n = obj->nr_extern;
4029                 struct extern_desc *ext;
4030
4031                 for (i = 0; i < n; i++) {
4032                         ext = &obj->externs[i];
4033                         if (ext->sym_idx == sym_idx)
4034                                 break;
4035                 }
4036                 if (i >= n) {
4037                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4038                                 prog->name, sym_name, sym_idx);
4039                         return -LIBBPF_ERRNO__RELOC;
4040                 }
4041                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4042                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
4043                 if (insn->code == (BPF_JMP | BPF_CALL))
4044                         reloc_desc->type = RELO_EXTERN_CALL;
4045                 else
4046                         reloc_desc->type = RELO_EXTERN_LD64;
4047                 reloc_desc->insn_idx = insn_idx;
4048                 reloc_desc->ext_idx = i;
4049                 return 0;
4050         }
4051
4052         /* sub-program call relocation */
4053         if (is_call_insn(insn)) {
4054                 if (insn->src_reg != BPF_PSEUDO_CALL) {
4055                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4056                         return -LIBBPF_ERRNO__RELOC;
4057                 }
4058                 /* text_shndx can be 0, if no default "main" program exists */
4059                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4060                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4061                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4062                                 prog->name, sym_name, sym_sec_name);
4063                         return -LIBBPF_ERRNO__RELOC;
4064                 }
4065                 if (sym->st_value % BPF_INSN_SZ) {
4066                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4067                                 prog->name, sym_name, (size_t)sym->st_value);
4068                         return -LIBBPF_ERRNO__RELOC;
4069                 }
4070                 reloc_desc->type = RELO_CALL;
4071                 reloc_desc->insn_idx = insn_idx;
4072                 reloc_desc->sym_off = sym->st_value;
4073                 return 0;
4074         }
4075
4076         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4077                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4078                         prog->name, sym_name, shdr_idx);
4079                 return -LIBBPF_ERRNO__RELOC;
4080         }
4081
4082         /* loading subprog addresses */
4083         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4084                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4085                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
4086                  */
4087                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4088                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4089                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4090                         return -LIBBPF_ERRNO__RELOC;
4091                 }
4092
4093                 reloc_desc->type = RELO_SUBPROG_ADDR;
4094                 reloc_desc->insn_idx = insn_idx;
4095                 reloc_desc->sym_off = sym->st_value;
4096                 return 0;
4097         }
4098
4099         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4100         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4101
4102         /* generic map reference relocation */
4103         if (type == LIBBPF_MAP_UNSPEC) {
4104                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4105                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4106                                 prog->name, sym_name, sym_sec_name);
4107                         return -LIBBPF_ERRNO__RELOC;
4108                 }
4109                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4110                         map = &obj->maps[map_idx];
4111                         if (map->libbpf_type != type ||
4112                             map->sec_idx != sym->st_shndx ||
4113                             map->sec_offset != sym->st_value)
4114                                 continue;
4115                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4116                                  prog->name, map_idx, map->name, map->sec_idx,
4117                                  map->sec_offset, insn_idx);
4118                         break;
4119                 }
4120                 if (map_idx >= nr_maps) {
4121                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4122                                 prog->name, sym_sec_name, (size_t)sym->st_value);
4123                         return -LIBBPF_ERRNO__RELOC;
4124                 }
4125                 reloc_desc->type = RELO_LD64;
4126                 reloc_desc->insn_idx = insn_idx;
4127                 reloc_desc->map_idx = map_idx;
4128                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4129                 return 0;
4130         }
4131
4132         /* global data map relocation */
4133         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4134                 pr_warn("prog '%s': bad data relo against section '%s'\n",
4135                         prog->name, sym_sec_name);
4136                 return -LIBBPF_ERRNO__RELOC;
4137         }
4138         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4139                 map = &obj->maps[map_idx];
4140                 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4141                         continue;
4142                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4143                          prog->name, map_idx, map->name, map->sec_idx,
4144                          map->sec_offset, insn_idx);
4145                 break;
4146         }
4147         if (map_idx >= nr_maps) {
4148                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4149                         prog->name, sym_sec_name);
4150                 return -LIBBPF_ERRNO__RELOC;
4151         }
4152
4153         reloc_desc->type = RELO_DATA;
4154         reloc_desc->insn_idx = insn_idx;
4155         reloc_desc->map_idx = map_idx;
4156         reloc_desc->sym_off = sym->st_value;
4157         return 0;
4158 }
4159
4160 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4161 {
4162         return insn_idx >= prog->sec_insn_off &&
4163                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4164 }
4165
4166 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4167                                                  size_t sec_idx, size_t insn_idx)
4168 {
4169         int l = 0, r = obj->nr_programs - 1, m;
4170         struct bpf_program *prog;
4171
4172         if (!obj->nr_programs)
4173                 return NULL;
4174
4175         while (l < r) {
4176                 m = l + (r - l + 1) / 2;
4177                 prog = &obj->programs[m];
4178
4179                 if (prog->sec_idx < sec_idx ||
4180                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4181                         l = m;
4182                 else
4183                         r = m - 1;
4184         }
4185         /* matching program could be at index l, but it still might be the
4186          * wrong one, so we need to double check conditions for the last time
4187          */
4188         prog = &obj->programs[l];
4189         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4190                 return prog;
4191         return NULL;
4192 }
4193
4194 static int
4195 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4196 {
4197         const char *relo_sec_name, *sec_name;
4198         size_t sec_idx = shdr->sh_info, sym_idx;
4199         struct bpf_program *prog;
4200         struct reloc_desc *relos;
4201         int err, i, nrels;
4202         const char *sym_name;
4203         __u32 insn_idx;
4204         Elf_Scn *scn;
4205         Elf_Data *scn_data;
4206         Elf64_Sym *sym;
4207         Elf64_Rel *rel;
4208
4209         if (sec_idx >= obj->efile.sec_cnt)
4210                 return -EINVAL;
4211
4212         scn = elf_sec_by_idx(obj, sec_idx);
4213         scn_data = elf_sec_data(obj, scn);
4214
4215         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4216         sec_name = elf_sec_name(obj, scn);
4217         if (!relo_sec_name || !sec_name)
4218                 return -EINVAL;
4219
4220         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4221                  relo_sec_name, sec_idx, sec_name);
4222         nrels = shdr->sh_size / shdr->sh_entsize;
4223
4224         for (i = 0; i < nrels; i++) {
4225                 rel = elf_rel_by_idx(data, i);
4226                 if (!rel) {
4227                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4228                         return -LIBBPF_ERRNO__FORMAT;
4229                 }
4230
4231                 sym_idx = ELF64_R_SYM(rel->r_info);
4232                 sym = elf_sym_by_idx(obj, sym_idx);
4233                 if (!sym) {
4234                         pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4235                                 relo_sec_name, sym_idx, i);
4236                         return -LIBBPF_ERRNO__FORMAT;
4237                 }
4238
4239                 if (sym->st_shndx >= obj->efile.sec_cnt) {
4240                         pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4241                                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4242                         return -LIBBPF_ERRNO__FORMAT;
4243                 }
4244
4245                 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4246                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4247                                 relo_sec_name, (size_t)rel->r_offset, i);
4248                         return -LIBBPF_ERRNO__FORMAT;
4249                 }
4250
4251                 insn_idx = rel->r_offset / BPF_INSN_SZ;
4252                 /* relocations against static functions are recorded as
4253                  * relocations against the section that contains a function;
4254                  * in such case, symbol will be STT_SECTION and sym.st_name
4255                  * will point to empty string (0), so fetch section name
4256                  * instead
4257                  */
4258                 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4259                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4260                 else
4261                         sym_name = elf_sym_str(obj, sym->st_name);
4262                 sym_name = sym_name ?: "<?";
4263
4264                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4265                          relo_sec_name, i, insn_idx, sym_name);
4266
4267                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4268                 if (!prog) {
4269                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4270                                 relo_sec_name, i, sec_name, insn_idx);
4271                         continue;
4272                 }
4273
4274                 relos = libbpf_reallocarray(prog->reloc_desc,
4275                                             prog->nr_reloc + 1, sizeof(*relos));
4276                 if (!relos)
4277                         return -ENOMEM;
4278                 prog->reloc_desc = relos;
4279
4280                 /* adjust insn_idx to local BPF program frame of reference */
4281                 insn_idx -= prog->sec_insn_off;
4282                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4283                                                 insn_idx, sym_name, sym, rel);
4284                 if (err)
4285                         return err;
4286
4287                 prog->nr_reloc++;
4288         }
4289         return 0;
4290 }
4291
4292 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4293 {
4294         int id;
4295
4296         if (!obj->btf)
4297                 return -ENOENT;
4298
4299         /* if it's BTF-defined map, we don't need to search for type IDs.
4300          * For struct_ops map, it does not need btf_key_type_id and
4301          * btf_value_type_id.
4302          */
4303         if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4304                 return 0;
4305
4306         /*
4307          * LLVM annotates global data differently in BTF, that is,
4308          * only as '.data', '.bss' or '.rodata'.
4309          */
4310         if (!bpf_map__is_internal(map))
4311                 return -ENOENT;
4312
4313         id = btf__find_by_name(obj->btf, map->real_name);
4314         if (id < 0)
4315                 return id;
4316
4317         map->btf_key_type_id = 0;
4318         map->btf_value_type_id = id;
4319         return 0;
4320 }
4321
4322 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4323 {
4324         char file[PATH_MAX], buff[4096];
4325         FILE *fp;
4326         __u32 val;
4327         int err;
4328
4329         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4330         memset(info, 0, sizeof(*info));
4331
4332         fp = fopen(file, "r");
4333         if (!fp) {
4334                 err = -errno;
4335                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4336                         err);
4337                 return err;
4338         }
4339
4340         while (fgets(buff, sizeof(buff), fp)) {
4341                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4342                         info->type = val;
4343                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4344                         info->key_size = val;
4345                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4346                         info->value_size = val;
4347                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4348                         info->max_entries = val;
4349                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4350                         info->map_flags = val;
4351         }
4352
4353         fclose(fp);
4354
4355         return 0;
4356 }
4357
4358 bool bpf_map__autocreate(const struct bpf_map *map)
4359 {
4360         return map->autocreate;
4361 }
4362
4363 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4364 {
4365         if (map->obj->loaded)
4366                 return libbpf_err(-EBUSY);
4367
4368         map->autocreate = autocreate;
4369         return 0;
4370 }
4371
4372 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4373 {
4374         struct bpf_map_info info;
4375         __u32 len = sizeof(info), name_len;
4376         int new_fd, err;
4377         char *new_name;
4378
4379         memset(&info, 0, len);
4380         err = bpf_map_get_info_by_fd(fd, &info, &len);
4381         if (err && errno == EINVAL)
4382                 err = bpf_get_map_info_from_fdinfo(fd, &info);
4383         if (err)
4384                 return libbpf_err(err);
4385
4386         name_len = strlen(info.name);
4387         if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4388                 new_name = strdup(map->name);
4389         else
4390                 new_name = strdup(info.name);
4391
4392         if (!new_name)
4393                 return libbpf_err(-errno);
4394
4395         new_fd = open("/", O_RDONLY | O_CLOEXEC);
4396         if (new_fd < 0) {
4397                 err = -errno;
4398                 goto err_free_new_name;
4399         }
4400
4401         new_fd = dup3(fd, new_fd, O_CLOEXEC);
4402         if (new_fd < 0) {
4403                 err = -errno;
4404                 goto err_close_new_fd;
4405         }
4406
4407         err = zclose(map->fd);
4408         if (err) {
4409                 err = -errno;
4410                 goto err_close_new_fd;
4411         }
4412         free(map->name);
4413
4414         map->fd = new_fd;
4415         map->name = new_name;
4416         map->def.type = info.type;
4417         map->def.key_size = info.key_size;
4418         map->def.value_size = info.value_size;
4419         map->def.max_entries = info.max_entries;
4420         map->def.map_flags = info.map_flags;
4421         map->btf_key_type_id = info.btf_key_type_id;
4422         map->btf_value_type_id = info.btf_value_type_id;
4423         map->reused = true;
4424         map->map_extra = info.map_extra;
4425
4426         return 0;
4427
4428 err_close_new_fd:
4429         close(new_fd);
4430 err_free_new_name:
4431         free(new_name);
4432         return libbpf_err(err);
4433 }
4434
4435 __u32 bpf_map__max_entries(const struct bpf_map *map)
4436 {
4437         return map->def.max_entries;
4438 }
4439
4440 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4441 {
4442         if (!bpf_map_type__is_map_in_map(map->def.type))
4443                 return errno = EINVAL, NULL;
4444
4445         return map->inner_map;
4446 }
4447
4448 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4449 {
4450         if (map->obj->loaded)
4451                 return libbpf_err(-EBUSY);
4452
4453         map->def.max_entries = max_entries;
4454
4455         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4456         if (map_is_ringbuf(map))
4457                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4458
4459         return 0;
4460 }
4461
4462 static int
4463 bpf_object__probe_loading(struct bpf_object *obj)
4464 {
4465         char *cp, errmsg[STRERR_BUFSIZE];
4466         struct bpf_insn insns[] = {
4467                 BPF_MOV64_IMM(BPF_REG_0, 0),
4468                 BPF_EXIT_INSN(),
4469         };
4470         int ret, insn_cnt = ARRAY_SIZE(insns);
4471
4472         if (obj->gen_loader)
4473                 return 0;
4474
4475         ret = bump_rlimit_memlock();
4476         if (ret)
4477                 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4478
4479         /* make sure basic loading works */
4480         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4481         if (ret < 0)
4482                 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4483         if (ret < 0) {
4484                 ret = errno;
4485                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4486                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4487                         "program. Make sure your kernel supports BPF "
4488                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4489                         "set to big enough value.\n", __func__, cp, ret);
4490                 return -ret;
4491         }
4492         close(ret);
4493
4494         return 0;
4495 }
4496
4497 static int probe_fd(int fd)
4498 {
4499         if (fd >= 0)
4500                 close(fd);
4501         return fd >= 0;
4502 }
4503
4504 static int probe_kern_prog_name(void)
4505 {
4506         const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
4507         struct bpf_insn insns[] = {
4508                 BPF_MOV64_IMM(BPF_REG_0, 0),
4509                 BPF_EXIT_INSN(),
4510         };
4511         union bpf_attr attr;
4512         int ret;
4513
4514         memset(&attr, 0, attr_sz);
4515         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
4516         attr.license = ptr_to_u64("GPL");
4517         attr.insns = ptr_to_u64(insns);
4518         attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
4519         libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
4520
4521         /* make sure loading with name works */
4522         ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
4523         return probe_fd(ret);
4524 }
4525
4526 static int probe_kern_global_data(void)
4527 {
4528         char *cp, errmsg[STRERR_BUFSIZE];
4529         struct bpf_insn insns[] = {
4530                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
4531                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
4532                 BPF_MOV64_IMM(BPF_REG_0, 0),
4533                 BPF_EXIT_INSN(),
4534         };
4535         int ret, map, insn_cnt = ARRAY_SIZE(insns);
4536
4537         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
4538         if (map < 0) {
4539                 ret = -errno;
4540                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4541                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4542                         __func__, cp, -ret);
4543                 return ret;
4544         }
4545
4546         insns[0].imm = map;
4547
4548         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4549         close(map);
4550         return probe_fd(ret);
4551 }
4552
4553 static int probe_kern_btf(void)
4554 {
4555         static const char strs[] = "\0int";
4556         __u32 types[] = {
4557                 /* int */
4558                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4559         };
4560
4561         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4562                                              strs, sizeof(strs)));
4563 }
4564
4565 static int probe_kern_btf_func(void)
4566 {
4567         static const char strs[] = "\0int\0x\0a";
4568         /* void x(int a) {} */
4569         __u32 types[] = {
4570                 /* int */
4571                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4572                 /* FUNC_PROTO */                                /* [2] */
4573                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4574                 BTF_PARAM_ENC(7, 1),
4575                 /* FUNC x */                                    /* [3] */
4576                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
4577         };
4578
4579         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4580                                              strs, sizeof(strs)));
4581 }
4582
4583 static int probe_kern_btf_func_global(void)
4584 {
4585         static const char strs[] = "\0int\0x\0a";
4586         /* static void x(int a) {} */
4587         __u32 types[] = {
4588                 /* int */
4589                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4590                 /* FUNC_PROTO */                                /* [2] */
4591                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
4592                 BTF_PARAM_ENC(7, 1),
4593                 /* FUNC x BTF_FUNC_GLOBAL */                    /* [3] */
4594                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2),
4595         };
4596
4597         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4598                                              strs, sizeof(strs)));
4599 }
4600
4601 static int probe_kern_btf_datasec(void)
4602 {
4603         static const char strs[] = "\0x\0.data";
4604         /* static int a; */
4605         __u32 types[] = {
4606                 /* int */
4607                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4608                 /* VAR x */                                     /* [2] */
4609                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4610                 BTF_VAR_STATIC,
4611                 /* DATASEC val */                               /* [3] */
4612                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
4613                 BTF_VAR_SECINFO_ENC(2, 0, 4),
4614         };
4615
4616         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4617                                              strs, sizeof(strs)));
4618 }
4619
4620 static int probe_kern_btf_float(void)
4621 {
4622         static const char strs[] = "\0float";
4623         __u32 types[] = {
4624                 /* float */
4625                 BTF_TYPE_FLOAT_ENC(1, 4),
4626         };
4627
4628         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4629                                              strs, sizeof(strs)));
4630 }
4631
4632 static int probe_kern_btf_decl_tag(void)
4633 {
4634         static const char strs[] = "\0tag";
4635         __u32 types[] = {
4636                 /* int */
4637                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
4638                 /* VAR x */                                     /* [2] */
4639                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
4640                 BTF_VAR_STATIC,
4641                 /* attr */
4642                 BTF_TYPE_DECL_TAG_ENC(1, 2, -1),
4643         };
4644
4645         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4646                                              strs, sizeof(strs)));
4647 }
4648
4649 static int probe_kern_btf_type_tag(void)
4650 {
4651         static const char strs[] = "\0tag";
4652         __u32 types[] = {
4653                 /* int */
4654                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),          /* [1] */
4655                 /* attr */
4656                 BTF_TYPE_TYPE_TAG_ENC(1, 1),                            /* [2] */
4657                 /* ptr */
4658                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),   /* [3] */
4659         };
4660
4661         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4662                                              strs, sizeof(strs)));
4663 }
4664
4665 static int probe_kern_array_mmap(void)
4666 {
4667         LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
4668         int fd;
4669
4670         fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
4671         return probe_fd(fd);
4672 }
4673
4674 static int probe_kern_exp_attach_type(void)
4675 {
4676         LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);
4677         struct bpf_insn insns[] = {
4678                 BPF_MOV64_IMM(BPF_REG_0, 0),
4679                 BPF_EXIT_INSN(),
4680         };
4681         int fd, insn_cnt = ARRAY_SIZE(insns);
4682
4683         /* use any valid combination of program type and (optional)
4684          * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
4685          * to see if kernel supports expected_attach_type field for
4686          * BPF_PROG_LOAD command
4687          */
4688         fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts);
4689         return probe_fd(fd);
4690 }
4691
4692 static int probe_kern_probe_read_kernel(void)
4693 {
4694         struct bpf_insn insns[] = {
4695                 BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),   /* r1 = r10 (fp) */
4696                 BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),  /* r1 += -8 */
4697                 BPF_MOV64_IMM(BPF_REG_2, 8),            /* r2 = 8 */
4698                 BPF_MOV64_IMM(BPF_REG_3, 0),            /* r3 = 0 */
4699                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),
4700                 BPF_EXIT_INSN(),
4701         };
4702         int fd, insn_cnt = ARRAY_SIZE(insns);
4703
4704         fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);
4705         return probe_fd(fd);
4706 }
4707
4708 static int probe_prog_bind_map(void)
4709 {
4710         char *cp, errmsg[STRERR_BUFSIZE];
4711         struct bpf_insn insns[] = {
4712                 BPF_MOV64_IMM(BPF_REG_0, 0),
4713                 BPF_EXIT_INSN(),
4714         };
4715         int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
4716
4717         map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
4718         if (map < 0) {
4719                 ret = -errno;
4720                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4721                 pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
4722                         __func__, cp, -ret);
4723                 return ret;
4724         }
4725
4726         prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);
4727         if (prog < 0) {
4728                 close(map);
4729                 return 0;
4730         }
4731
4732         ret = bpf_prog_bind_map(prog, map, NULL);
4733
4734         close(map);
4735         close(prog);
4736
4737         return ret >= 0;
4738 }
4739
4740 static int probe_module_btf(void)
4741 {
4742         static const char strs[] = "\0int";
4743         __u32 types[] = {
4744                 /* int */
4745                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),
4746         };
4747         struct bpf_btf_info info;
4748         __u32 len = sizeof(info);
4749         char name[16];
4750         int fd, err;
4751
4752         fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs));
4753         if (fd < 0)
4754                 return 0; /* BTF not supported at all */
4755
4756         memset(&info, 0, sizeof(info));
4757         info.name = ptr_to_u64(name);
4758         info.name_len = sizeof(name);
4759
4760         /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer;
4761          * kernel's module BTF support coincides with support for
4762          * name/name_len fields in struct bpf_btf_info.
4763          */
4764         err = bpf_btf_get_info_by_fd(fd, &info, &len);
4765         close(fd);
4766         return !err;
4767 }
4768
4769 static int probe_perf_link(void)
4770 {
4771         struct bpf_insn insns[] = {
4772                 BPF_MOV64_IMM(BPF_REG_0, 0),
4773                 BPF_EXIT_INSN(),
4774         };
4775         int prog_fd, link_fd, err;
4776
4777         prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL",
4778                                 insns, ARRAY_SIZE(insns), NULL);
4779         if (prog_fd < 0)
4780                 return -errno;
4781
4782         /* use invalid perf_event FD to get EBADF, if link is supported;
4783          * otherwise EINVAL should be returned
4784          */
4785         link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL);
4786         err = -errno; /* close() can clobber errno */
4787
4788         if (link_fd >= 0)
4789                 close(link_fd);
4790         close(prog_fd);
4791
4792         return link_fd < 0 && err == -EBADF;
4793 }
4794
4795 static int probe_kern_bpf_cookie(void)
4796 {
4797         struct bpf_insn insns[] = {
4798                 BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie),
4799                 BPF_EXIT_INSN(),
4800         };
4801         int ret, insn_cnt = ARRAY_SIZE(insns);
4802
4803         ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL);
4804         return probe_fd(ret);
4805 }
4806
4807 static int probe_kern_btf_enum64(void)
4808 {
4809         static const char strs[] = "\0enum64";
4810         __u32 types[] = {
4811                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
4812         };
4813
4814         return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
4815                                              strs, sizeof(strs)));
4816 }
4817
4818 static int probe_kern_syscall_wrapper(void);
4819
4820 enum kern_feature_result {
4821         FEAT_UNKNOWN = 0,
4822         FEAT_SUPPORTED = 1,
4823         FEAT_MISSING = 2,
4824 };
4825
4826 typedef int (*feature_probe_fn)(void);
4827
4828 static struct kern_feature_desc {
4829         const char *desc;
4830         feature_probe_fn probe;
4831         enum kern_feature_result res;
4832 } feature_probes[__FEAT_CNT] = {
4833         [FEAT_PROG_NAME] = {
4834                 "BPF program name", probe_kern_prog_name,
4835         },
4836         [FEAT_GLOBAL_DATA] = {
4837                 "global variables", probe_kern_global_data,
4838         },
4839         [FEAT_BTF] = {
4840                 "minimal BTF", probe_kern_btf,
4841         },
4842         [FEAT_BTF_FUNC] = {
4843                 "BTF functions", probe_kern_btf_func,
4844         },
4845         [FEAT_BTF_GLOBAL_FUNC] = {
4846                 "BTF global function", probe_kern_btf_func_global,
4847         },
4848         [FEAT_BTF_DATASEC] = {
4849                 "BTF data section and variable", probe_kern_btf_datasec,
4850         },
4851         [FEAT_ARRAY_MMAP] = {
4852                 "ARRAY map mmap()", probe_kern_array_mmap,
4853         },
4854         [FEAT_EXP_ATTACH_TYPE] = {
4855                 "BPF_PROG_LOAD expected_attach_type attribute",
4856                 probe_kern_exp_attach_type,
4857         },
4858         [FEAT_PROBE_READ_KERN] = {
4859                 "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel,
4860         },
4861         [FEAT_PROG_BIND_MAP] = {
4862                 "BPF_PROG_BIND_MAP support", probe_prog_bind_map,
4863         },
4864         [FEAT_MODULE_BTF] = {
4865                 "module BTF support", probe_module_btf,
4866         },
4867         [FEAT_BTF_FLOAT] = {
4868                 "BTF_KIND_FLOAT support", probe_kern_btf_float,
4869         },
4870         [FEAT_PERF_LINK] = {
4871                 "BPF perf link support", probe_perf_link,
4872         },
4873         [FEAT_BTF_DECL_TAG] = {
4874                 "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,
4875         },
4876         [FEAT_BTF_TYPE_TAG] = {
4877                 "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag,
4878         },
4879         [FEAT_MEMCG_ACCOUNT] = {
4880                 "memcg-based memory accounting", probe_memcg_account,
4881         },
4882         [FEAT_BPF_COOKIE] = {
4883                 "BPF cookie support", probe_kern_bpf_cookie,
4884         },
4885         [FEAT_BTF_ENUM64] = {
4886                 "BTF_KIND_ENUM64 support", probe_kern_btf_enum64,
4887         },
4888         [FEAT_SYSCALL_WRAPPER] = {
4889                 "Kernel using syscall wrapper", probe_kern_syscall_wrapper,
4890         },
4891 };
4892
4893 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4894 {
4895         struct kern_feature_desc *feat = &feature_probes[feat_id];
4896         int ret;
4897
4898         if (obj && obj->gen_loader)
4899                 /* To generate loader program assume the latest kernel
4900                  * to avoid doing extra prog_load, map_create syscalls.
4901                  */
4902                 return true;
4903
4904         if (READ_ONCE(feat->res) == FEAT_UNKNOWN) {
4905                 ret = feat->probe();
4906                 if (ret > 0) {
4907                         WRITE_ONCE(feat->res, FEAT_SUPPORTED);
4908                 } else if (ret == 0) {
4909                         WRITE_ONCE(feat->res, FEAT_MISSING);
4910                 } else {
4911                         pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret);
4912                         WRITE_ONCE(feat->res, FEAT_MISSING);
4913                 }
4914         }
4915
4916         return READ_ONCE(feat->res) == FEAT_SUPPORTED;
4917 }
4918
4919 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4920 {
4921         struct bpf_map_info map_info;
4922         char msg[STRERR_BUFSIZE];
4923         __u32 map_info_len = sizeof(map_info);
4924         int err;
4925
4926         memset(&map_info, 0, map_info_len);
4927         err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
4928         if (err && errno == EINVAL)
4929                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4930         if (err) {
4931                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4932                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4933                 return false;
4934         }
4935
4936         return (map_info.type == map->def.type &&
4937                 map_info.key_size == map->def.key_size &&
4938                 map_info.value_size == map->def.value_size &&
4939                 map_info.max_entries == map->def.max_entries &&
4940                 map_info.map_flags == map->def.map_flags &&
4941                 map_info.map_extra == map->map_extra);
4942 }
4943
4944 static int
4945 bpf_object__reuse_map(struct bpf_map *map)
4946 {
4947         char *cp, errmsg[STRERR_BUFSIZE];
4948         int err, pin_fd;
4949
4950         pin_fd = bpf_obj_get(map->pin_path);
4951         if (pin_fd < 0) {
4952                 err = -errno;
4953                 if (err == -ENOENT) {
4954                         pr_debug("found no pinned map to reuse at '%s'\n",
4955                                  map->pin_path);
4956                         return 0;
4957                 }
4958
4959                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4960                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4961                         map->pin_path, cp);
4962                 return err;
4963         }
4964
4965         if (!map_is_reuse_compat(map, pin_fd)) {
4966                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4967                         map->pin_path);
4968                 close(pin_fd);
4969                 return -EINVAL;
4970         }
4971
4972         err = bpf_map__reuse_fd(map, pin_fd);
4973         close(pin_fd);
4974         if (err)
4975                 return err;
4976
4977         map->pinned = true;
4978         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4979
4980         return 0;
4981 }
4982
4983 static int
4984 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4985 {
4986         enum libbpf_map_type map_type = map->libbpf_type;
4987         char *cp, errmsg[STRERR_BUFSIZE];
4988         int err, zero = 0;
4989
4990         if (obj->gen_loader) {
4991                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4992                                          map->mmaped, map->def.value_size);
4993                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4994                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4995                 return 0;
4996         }
4997         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4998         if (err) {
4999                 err = -errno;
5000                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5001                 pr_warn("Error setting initial map(%s) contents: %s\n",
5002                         map->name, cp);
5003                 return err;
5004         }
5005
5006         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
5007         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
5008                 err = bpf_map_freeze(map->fd);
5009                 if (err) {
5010                         err = -errno;
5011                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5012                         pr_warn("Error freezing map(%s) as read-only: %s\n",
5013                                 map->name, cp);
5014                         return err;
5015                 }
5016         }
5017         return 0;
5018 }
5019
5020 static void bpf_map__destroy(struct bpf_map *map);
5021
5022 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
5023 {
5024         LIBBPF_OPTS(bpf_map_create_opts, create_attr);
5025         struct bpf_map_def *def = &map->def;
5026         const char *map_name = NULL;
5027         int err = 0;
5028
5029         if (kernel_supports(obj, FEAT_PROG_NAME))
5030                 map_name = map->name;
5031         create_attr.map_ifindex = map->map_ifindex;
5032         create_attr.map_flags = def->map_flags;
5033         create_attr.numa_node = map->numa_node;
5034         create_attr.map_extra = map->map_extra;
5035
5036         if (bpf_map__is_struct_ops(map))
5037                 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
5038
5039         if (obj->btf && btf__fd(obj->btf) >= 0) {
5040                 create_attr.btf_fd = btf__fd(obj->btf);
5041                 create_attr.btf_key_type_id = map->btf_key_type_id;
5042                 create_attr.btf_value_type_id = map->btf_value_type_id;
5043         }
5044
5045         if (bpf_map_type__is_map_in_map(def->type)) {
5046                 if (map->inner_map) {
5047                         err = bpf_object__create_map(obj, map->inner_map, true);
5048                         if (err) {
5049                                 pr_warn("map '%s': failed to create inner map: %d\n",
5050                                         map->name, err);
5051                                 return err;
5052                         }
5053                         map->inner_map_fd = bpf_map__fd(map->inner_map);
5054                 }
5055                 if (map->inner_map_fd >= 0)
5056                         create_attr.inner_map_fd = map->inner_map_fd;
5057         }
5058
5059         switch (def->type) {
5060         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
5061         case BPF_MAP_TYPE_CGROUP_ARRAY:
5062         case BPF_MAP_TYPE_STACK_TRACE:
5063         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
5064         case BPF_MAP_TYPE_HASH_OF_MAPS:
5065         case BPF_MAP_TYPE_DEVMAP:
5066         case BPF_MAP_TYPE_DEVMAP_HASH:
5067         case BPF_MAP_TYPE_CPUMAP:
5068         case BPF_MAP_TYPE_XSKMAP:
5069         case BPF_MAP_TYPE_SOCKMAP:
5070         case BPF_MAP_TYPE_SOCKHASH:
5071         case BPF_MAP_TYPE_QUEUE:
5072         case BPF_MAP_TYPE_STACK:
5073                 create_attr.btf_fd = 0;
5074                 create_attr.btf_key_type_id = 0;
5075                 create_attr.btf_value_type_id = 0;
5076                 map->btf_key_type_id = 0;
5077                 map->btf_value_type_id = 0;
5078         default:
5079                 break;
5080         }
5081
5082         if (obj->gen_loader) {
5083                 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
5084                                     def->key_size, def->value_size, def->max_entries,
5085                                     &create_attr, is_inner ? -1 : map - obj->maps);
5086                 /* Pretend to have valid FD to pass various fd >= 0 checks.
5087                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
5088                  */
5089                 map->fd = 0;
5090         } else {
5091                 map->fd = bpf_map_create(def->type, map_name,
5092                                          def->key_size, def->value_size,
5093                                          def->max_entries, &create_attr);
5094         }
5095         if (map->fd < 0 && (create_attr.btf_key_type_id ||
5096                             create_attr.btf_value_type_id)) {
5097                 char *cp, errmsg[STRERR_BUFSIZE];
5098
5099                 err = -errno;
5100                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5101                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
5102                         map->name, cp, err);
5103                 create_attr.btf_fd = 0;
5104                 create_attr.btf_key_type_id = 0;
5105                 create_attr.btf_value_type_id = 0;
5106                 map->btf_key_type_id = 0;
5107                 map->btf_value_type_id = 0;
5108                 map->fd = bpf_map_create(def->type, map_name,
5109                                          def->key_size, def->value_size,
5110                                          def->max_entries, &create_attr);
5111         }
5112
5113         err = map->fd < 0 ? -errno : 0;
5114
5115         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
5116                 if (obj->gen_loader)
5117                         map->inner_map->fd = -1;
5118                 bpf_map__destroy(map->inner_map);
5119                 zfree(&map->inner_map);
5120         }
5121
5122         return err;
5123 }
5124
5125 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
5126 {
5127         const struct bpf_map *targ_map;
5128         unsigned int i;
5129         int fd, err = 0;
5130
5131         for (i = 0; i < map->init_slots_sz; i++) {
5132                 if (!map->init_slots[i])
5133                         continue;
5134
5135                 targ_map = map->init_slots[i];
5136                 fd = bpf_map__fd(targ_map);
5137
5138                 if (obj->gen_loader) {
5139                         bpf_gen__populate_outer_map(obj->gen_loader,
5140                                                     map - obj->maps, i,
5141                                                     targ_map - obj->maps);
5142                 } else {
5143                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5144                 }
5145                 if (err) {
5146                         err = -errno;
5147                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
5148                                 map->name, i, targ_map->name, fd, err);
5149                         return err;
5150                 }
5151                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
5152                          map->name, i, targ_map->name, fd);
5153         }
5154
5155         zfree(&map->init_slots);
5156         map->init_slots_sz = 0;
5157
5158         return 0;
5159 }
5160
5161 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
5162 {
5163         const struct bpf_program *targ_prog;
5164         unsigned int i;
5165         int fd, err;
5166
5167         if (obj->gen_loader)
5168                 return -ENOTSUP;
5169
5170         for (i = 0; i < map->init_slots_sz; i++) {
5171                 if (!map->init_slots[i])
5172                         continue;
5173
5174                 targ_prog = map->init_slots[i];
5175                 fd = bpf_program__fd(targ_prog);
5176
5177                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
5178                 if (err) {
5179                         err = -errno;
5180                         pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
5181                                 map->name, i, targ_prog->name, fd, err);
5182                         return err;
5183                 }
5184                 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
5185                          map->name, i, targ_prog->name, fd);
5186         }
5187
5188         zfree(&map->init_slots);
5189         map->init_slots_sz = 0;
5190
5191         return 0;
5192 }
5193
5194 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
5195 {
5196         struct bpf_map *map;
5197         int i, err;
5198
5199         for (i = 0; i < obj->nr_maps; i++) {
5200                 map = &obj->maps[i];
5201
5202                 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
5203                         continue;
5204
5205                 err = init_prog_array_slots(obj, map);
5206                 if (err < 0) {
5207                         zclose(map->fd);
5208                         return err;
5209                 }
5210         }
5211         return 0;
5212 }
5213
5214 static int map_set_def_max_entries(struct bpf_map *map)
5215 {
5216         if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5217                 int nr_cpus;
5218
5219                 nr_cpus = libbpf_num_possible_cpus();
5220                 if (nr_cpus < 0) {
5221                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5222                                 map->name, nr_cpus);
5223                         return nr_cpus;
5224                 }
5225                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5226                 map->def.max_entries = nr_cpus;
5227         }
5228
5229         return 0;
5230 }
5231
5232 static int
5233 bpf_object__create_maps(struct bpf_object *obj)
5234 {
5235         struct bpf_map *map;
5236         char *cp, errmsg[STRERR_BUFSIZE];
5237         unsigned int i, j;
5238         int err;
5239         bool retried;
5240
5241         for (i = 0; i < obj->nr_maps; i++) {
5242                 map = &obj->maps[i];
5243
5244                 /* To support old kernels, we skip creating global data maps
5245                  * (.rodata, .data, .kconfig, etc); later on, during program
5246                  * loading, if we detect that at least one of the to-be-loaded
5247                  * programs is referencing any global data map, we'll error
5248                  * out with program name and relocation index logged.
5249                  * This approach allows to accommodate Clang emitting
5250                  * unnecessary .rodata.str1.1 sections for string literals,
5251                  * but also it allows to have CO-RE applications that use
5252                  * global variables in some of BPF programs, but not others.
5253                  * If those global variable-using programs are not loaded at
5254                  * runtime due to bpf_program__set_autoload(prog, false),
5255                  * bpf_object loading will succeed just fine even on old
5256                  * kernels.
5257                  */
5258                 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5259                         map->autocreate = false;
5260
5261                 if (!map->autocreate) {
5262                         pr_debug("map '%s': skipped auto-creating...\n", map->name);
5263                         continue;
5264                 }
5265
5266                 err = map_set_def_max_entries(map);
5267                 if (err)
5268                         goto err_out;
5269
5270                 retried = false;
5271 retry:
5272                 if (map->pin_path) {
5273                         err = bpf_object__reuse_map(map);
5274                         if (err) {
5275                                 pr_warn("map '%s': error reusing pinned map\n",
5276                                         map->name);
5277                                 goto err_out;
5278                         }
5279                         if (retried && map->fd < 0) {
5280                                 pr_warn("map '%s': cannot find pinned map\n",
5281                                         map->name);
5282                                 err = -ENOENT;
5283                                 goto err_out;
5284                         }
5285                 }
5286
5287                 if (map->fd >= 0) {
5288                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5289                                  map->name, map->fd);
5290                 } else {
5291                         err = bpf_object__create_map(obj, map, false);
5292                         if (err)
5293                                 goto err_out;
5294
5295                         pr_debug("map '%s': created successfully, fd=%d\n",
5296                                  map->name, map->fd);
5297
5298                         if (bpf_map__is_internal(map)) {
5299                                 err = bpf_object__populate_internal_map(obj, map);
5300                                 if (err < 0) {
5301                                         zclose(map->fd);
5302                                         goto err_out;
5303                                 }
5304                         }
5305
5306                         if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5307                                 err = init_map_in_map_slots(obj, map);
5308                                 if (err < 0) {
5309                                         zclose(map->fd);
5310                                         goto err_out;
5311                                 }
5312                         }
5313                 }
5314
5315                 if (map->pin_path && !map->pinned) {
5316                         err = bpf_map__pin(map, NULL);
5317                         if (err) {
5318                                 zclose(map->fd);
5319                                 if (!retried && err == -EEXIST) {
5320                                         retried = true;
5321                                         goto retry;
5322                                 }
5323                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5324                                         map->name, map->pin_path, err);
5325                                 goto err_out;
5326                         }
5327                 }
5328         }
5329
5330         return 0;
5331
5332 err_out:
5333         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5334         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5335         pr_perm_msg(err);
5336         for (j = 0; j < i; j++)
5337                 zclose(obj->maps[j].fd);
5338         return err;
5339 }
5340
5341 static bool bpf_core_is_flavor_sep(const char *s)
5342 {
5343         /* check X___Y name pattern, where X and Y are not underscores */
5344         return s[0] != '_' &&                                 /* X */
5345                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5346                s[4] != '_';                                   /* Y */
5347 }
5348
5349 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5350  * before last triple underscore. Struct name part after last triple
5351  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5352  */
5353 size_t bpf_core_essential_name_len(const char *name)
5354 {
5355         size_t n = strlen(name);
5356         int i;
5357
5358         for (i = n - 5; i >= 0; i--) {
5359                 if (bpf_core_is_flavor_sep(name + i))
5360                         return i + 1;
5361         }
5362         return n;
5363 }
5364
5365 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5366 {
5367         if (!cands)
5368                 return;
5369
5370         free(cands->cands);
5371         free(cands);
5372 }
5373
5374 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5375                        size_t local_essent_len,
5376                        const struct btf *targ_btf,
5377                        const char *targ_btf_name,
5378                        int targ_start_id,
5379                        struct bpf_core_cand_list *cands)
5380 {
5381         struct bpf_core_cand *new_cands, *cand;
5382         const struct btf_type *t, *local_t;
5383         const char *targ_name, *local_name;
5384         size_t targ_essent_len;
5385         int n, i;
5386
5387         local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5388         local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5389
5390         n = btf__type_cnt(targ_btf);
5391         for (i = targ_start_id; i < n; i++) {
5392                 t = btf__type_by_id(targ_btf, i);
5393                 if (!btf_kind_core_compat(t, local_t))
5394                         continue;
5395
5396                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5397                 if (str_is_empty(targ_name))
5398                         continue;
5399
5400                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5401                 if (targ_essent_len != local_essent_len)
5402                         continue;
5403
5404                 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5405                         continue;
5406
5407                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5408                          local_cand->id, btf_kind_str(local_t),
5409                          local_name, i, btf_kind_str(t), targ_name,
5410                          targ_btf_name);
5411                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5412                                               sizeof(*cands->cands));
5413                 if (!new_cands)
5414                         return -ENOMEM;
5415
5416                 cand = &new_cands[cands->len];
5417                 cand->btf = targ_btf;
5418                 cand->id = i;
5419
5420                 cands->cands = new_cands;
5421                 cands->len++;
5422         }
5423         return 0;
5424 }
5425
5426 static int load_module_btfs(struct bpf_object *obj)
5427 {
5428         struct bpf_btf_info info;
5429         struct module_btf *mod_btf;
5430         struct btf *btf;
5431         char name[64];
5432         __u32 id = 0, len;
5433         int err, fd;
5434
5435         if (obj->btf_modules_loaded)
5436                 return 0;
5437
5438         if (obj->gen_loader)
5439                 return 0;
5440
5441         /* don't do this again, even if we find no module BTFs */
5442         obj->btf_modules_loaded = true;
5443
5444         /* kernel too old to support module BTFs */
5445         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5446                 return 0;
5447
5448         while (true) {
5449                 err = bpf_btf_get_next_id(id, &id);
5450                 if (err && errno == ENOENT)
5451                         return 0;
5452                 if (err) {
5453                         err = -errno;
5454                         pr_warn("failed to iterate BTF objects: %d\n", err);
5455                         return err;
5456                 }
5457
5458                 fd = bpf_btf_get_fd_by_id(id);
5459                 if (fd < 0) {
5460                         if (errno == ENOENT)
5461                                 continue; /* expected race: BTF was unloaded */
5462                         err = -errno;
5463                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5464                         return err;
5465                 }
5466
5467                 len = sizeof(info);
5468                 memset(&info, 0, sizeof(info));
5469                 info.name = ptr_to_u64(name);
5470                 info.name_len = sizeof(name);
5471
5472                 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5473                 if (err) {
5474                         err = -errno;
5475                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5476                         goto err_out;
5477                 }
5478
5479                 /* ignore non-module BTFs */
5480                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5481                         close(fd);
5482                         continue;
5483                 }
5484
5485                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5486                 err = libbpf_get_error(btf);
5487                 if (err) {
5488                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5489                                 name, id, err);
5490                         goto err_out;
5491                 }
5492
5493                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5494                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5495                 if (err)
5496                         goto err_out;
5497
5498                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5499
5500                 mod_btf->btf = btf;
5501                 mod_btf->id = id;
5502                 mod_btf->fd = fd;
5503                 mod_btf->name = strdup(name);
5504                 if (!mod_btf->name) {
5505                         err = -ENOMEM;
5506                         goto err_out;
5507                 }
5508                 continue;
5509
5510 err_out:
5511                 close(fd);
5512                 return err;
5513         }
5514
5515         return 0;
5516 }
5517
5518 static struct bpf_core_cand_list *
5519 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5520 {
5521         struct bpf_core_cand local_cand = {};
5522         struct bpf_core_cand_list *cands;
5523         const struct btf *main_btf;
5524         const struct btf_type *local_t;
5525         const char *local_name;
5526         size_t local_essent_len;
5527         int err, i;
5528
5529         local_cand.btf = local_btf;
5530         local_cand.id = local_type_id;
5531         local_t = btf__type_by_id(local_btf, local_type_id);
5532         if (!local_t)
5533                 return ERR_PTR(-EINVAL);
5534
5535         local_name = btf__name_by_offset(local_btf, local_t->name_off);
5536         if (str_is_empty(local_name))
5537                 return ERR_PTR(-EINVAL);
5538         local_essent_len = bpf_core_essential_name_len(local_name);
5539
5540         cands = calloc(1, sizeof(*cands));
5541         if (!cands)
5542                 return ERR_PTR(-ENOMEM);
5543
5544         /* Attempt to find target candidates in vmlinux BTF first */
5545         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5546         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5547         if (err)
5548                 goto err_out;
5549
5550         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5551         if (cands->len)
5552                 return cands;
5553
5554         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5555         if (obj->btf_vmlinux_override)
5556                 return cands;
5557
5558         /* now look through module BTFs, trying to still find candidates */
5559         err = load_module_btfs(obj);
5560         if (err)
5561                 goto err_out;
5562
5563         for (i = 0; i < obj->btf_module_cnt; i++) {
5564                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5565                                          obj->btf_modules[i].btf,
5566                                          obj->btf_modules[i].name,
5567                                          btf__type_cnt(obj->btf_vmlinux),
5568                                          cands);
5569                 if (err)
5570                         goto err_out;
5571         }
5572
5573         return cands;
5574 err_out:
5575         bpf_core_free_cands(cands);
5576         return ERR_PTR(err);
5577 }
5578
5579 /* Check local and target types for compatibility. This check is used for
5580  * type-based CO-RE relocations and follow slightly different rules than
5581  * field-based relocations. This function assumes that root types were already
5582  * checked for name match. Beyond that initial root-level name check, names
5583  * are completely ignored. Compatibility rules are as follows:
5584  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5585  *     kind should match for local and target types (i.e., STRUCT is not
5586  *     compatible with UNION);
5587  *   - for ENUMs, the size is ignored;
5588  *   - for INT, size and signedness are ignored;
5589  *   - for ARRAY, dimensionality is ignored, element types are checked for
5590  *     compatibility recursively;
5591  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5592  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5593  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5594  *     number of input args and compatible return and argument types.
5595  * These rules are not set in stone and probably will be adjusted as we get
5596  * more experience with using BPF CO-RE relocations.
5597  */
5598 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5599                               const struct btf *targ_btf, __u32 targ_id)
5600 {
5601         return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5602 }
5603
5604 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5605                          const struct btf *targ_btf, __u32 targ_id)
5606 {
5607         return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5608 }
5609
5610 static size_t bpf_core_hash_fn(const long key, void *ctx)
5611 {
5612         return key;
5613 }
5614
5615 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5616 {
5617         return k1 == k2;
5618 }
5619
5620 static int record_relo_core(struct bpf_program *prog,
5621                             const struct bpf_core_relo *core_relo, int insn_idx)
5622 {
5623         struct reloc_desc *relos, *relo;
5624
5625         relos = libbpf_reallocarray(prog->reloc_desc,
5626                                     prog->nr_reloc + 1, sizeof(*relos));
5627         if (!relos)
5628                 return -ENOMEM;
5629         relo = &relos[prog->nr_reloc];
5630         relo->type = RELO_CORE;
5631         relo->insn_idx = insn_idx;
5632         relo->core_relo = core_relo;
5633         prog->reloc_desc = relos;
5634         prog->nr_reloc++;
5635         return 0;
5636 }
5637
5638 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5639 {
5640         struct reloc_desc *relo;
5641         int i;
5642
5643         for (i = 0; i < prog->nr_reloc; i++) {
5644                 relo = &prog->reloc_desc[i];
5645                 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5646                         continue;
5647
5648                 return relo->core_relo;
5649         }
5650
5651         return NULL;
5652 }
5653
5654 static int bpf_core_resolve_relo(struct bpf_program *prog,
5655                                  const struct bpf_core_relo *relo,
5656                                  int relo_idx,
5657                                  const struct btf *local_btf,
5658                                  struct hashmap *cand_cache,
5659                                  struct bpf_core_relo_res *targ_res)
5660 {
5661         struct bpf_core_spec specs_scratch[3] = {};
5662         struct bpf_core_cand_list *cands = NULL;
5663         const char *prog_name = prog->name;
5664         const struct btf_type *local_type;
5665         const char *local_name;
5666         __u32 local_id = relo->type_id;
5667         int err;
5668
5669         local_type = btf__type_by_id(local_btf, local_id);
5670         if (!local_type)
5671                 return -EINVAL;
5672
5673         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5674         if (!local_name)
5675                 return -EINVAL;
5676
5677         if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5678             !hashmap__find(cand_cache, local_id, &cands)) {
5679                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5680                 if (IS_ERR(cands)) {
5681                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5682                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5683                                 local_name, PTR_ERR(cands));
5684                         return PTR_ERR(cands);
5685                 }
5686                 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5687                 if (err) {
5688                         bpf_core_free_cands(cands);
5689                         return err;
5690                 }
5691         }
5692
5693         return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5694                                        targ_res);
5695 }
5696
5697 static int
5698 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5699 {
5700         const struct btf_ext_info_sec *sec;
5701         struct bpf_core_relo_res targ_res;
5702         const struct bpf_core_relo *rec;
5703         const struct btf_ext_info *seg;
5704         struct hashmap_entry *entry;
5705         struct hashmap *cand_cache = NULL;
5706         struct bpf_program *prog;
5707         struct bpf_insn *insn;
5708         const char *sec_name;
5709         int i, err = 0, insn_idx, sec_idx, sec_num;
5710
5711         if (obj->btf_ext->core_relo_info.len == 0)
5712                 return 0;
5713
5714         if (targ_btf_path) {
5715                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5716                 err = libbpf_get_error(obj->btf_vmlinux_override);
5717                 if (err) {
5718                         pr_warn("failed to parse target BTF: %d\n", err);
5719                         return err;
5720                 }
5721         }
5722
5723         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5724         if (IS_ERR(cand_cache)) {
5725                 err = PTR_ERR(cand_cache);
5726                 goto out;
5727         }
5728
5729         seg = &obj->btf_ext->core_relo_info;
5730         sec_num = 0;
5731         for_each_btf_ext_sec(seg, sec) {
5732                 sec_idx = seg->sec_idxs[sec_num];
5733                 sec_num++;
5734
5735                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5736                 if (str_is_empty(sec_name)) {
5737                         err = -EINVAL;
5738                         goto out;
5739                 }
5740
5741                 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5742
5743                 for_each_btf_ext_rec(seg, sec, i, rec) {
5744                         if (rec->insn_off % BPF_INSN_SZ)
5745                                 return -EINVAL;
5746                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5747                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5748                         if (!prog) {
5749                                 /* When __weak subprog is "overridden" by another instance
5750                                  * of the subprog from a different object file, linker still
5751                                  * appends all the .BTF.ext info that used to belong to that
5752                                  * eliminated subprogram.
5753                                  * This is similar to what x86-64 linker does for relocations.
5754                                  * So just ignore such relocations just like we ignore
5755                                  * subprog instructions when discovering subprograms.
5756                                  */
5757                                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5758                                          sec_name, i, insn_idx);
5759                                 continue;
5760                         }
5761                         /* no need to apply CO-RE relocation if the program is
5762                          * not going to be loaded
5763                          */
5764                         if (!prog->autoload)
5765                                 continue;
5766
5767                         /* adjust insn_idx from section frame of reference to the local
5768                          * program's frame of reference; (sub-)program code is not yet
5769                          * relocated, so it's enough to just subtract in-section offset
5770                          */
5771                         insn_idx = insn_idx - prog->sec_insn_off;
5772                         if (insn_idx >= prog->insns_cnt)
5773                                 return -EINVAL;
5774                         insn = &prog->insns[insn_idx];
5775
5776                         err = record_relo_core(prog, rec, insn_idx);
5777                         if (err) {
5778                                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5779                                         prog->name, i, err);
5780                                 goto out;
5781                         }
5782
5783                         if (prog->obj->gen_loader)
5784                                 continue;
5785
5786                         err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5787                         if (err) {
5788                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5789                                         prog->name, i, err);
5790                                 goto out;
5791                         }
5792
5793                         err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5794                         if (err) {
5795                                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5796                                         prog->name, i, insn_idx, err);
5797                                 goto out;
5798                         }
5799                 }
5800         }
5801
5802 out:
5803         /* obj->btf_vmlinux and module BTFs are freed after object load */
5804         btf__free(obj->btf_vmlinux_override);
5805         obj->btf_vmlinux_override = NULL;
5806
5807         if (!IS_ERR_OR_NULL(cand_cache)) {
5808                 hashmap__for_each_entry(cand_cache, entry, i) {
5809                         bpf_core_free_cands(entry->pvalue);
5810                 }
5811                 hashmap__free(cand_cache);
5812         }
5813         return err;
5814 }
5815
5816 /* base map load ldimm64 special constant, used also for log fixup logic */
5817 #define POISON_LDIMM64_MAP_BASE 2001000000
5818 #define POISON_LDIMM64_MAP_PFX "200100"
5819
5820 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5821                                int insn_idx, struct bpf_insn *insn,
5822                                int map_idx, const struct bpf_map *map)
5823 {
5824         int i;
5825
5826         pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5827                  prog->name, relo_idx, insn_idx, map_idx, map->name);
5828
5829         /* we turn single ldimm64 into two identical invalid calls */
5830         for (i = 0; i < 2; i++) {
5831                 insn->code = BPF_JMP | BPF_CALL;
5832                 insn->dst_reg = 0;
5833                 insn->src_reg = 0;
5834                 insn->off = 0;
5835                 /* if this instruction is reachable (not a dead code),
5836                  * verifier will complain with something like:
5837                  * invalid func unknown#2001000123
5838                  * where lower 123 is map index into obj->maps[] array
5839                  */
5840                 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
5841
5842                 insn++;
5843         }
5844 }
5845
5846 /* unresolved kfunc call special constant, used also for log fixup logic */
5847 #define POISON_CALL_KFUNC_BASE 2002000000
5848 #define POISON_CALL_KFUNC_PFX "2002"
5849
5850 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
5851                               int insn_idx, struct bpf_insn *insn,
5852                               int ext_idx, const struct extern_desc *ext)
5853 {
5854         pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5855                  prog->name, relo_idx, insn_idx, ext->name);
5856
5857         /* we turn kfunc call into invalid helper call with identifiable constant */
5858         insn->code = BPF_JMP | BPF_CALL;
5859         insn->dst_reg = 0;
5860         insn->src_reg = 0;
5861         insn->off = 0;
5862         /* if this instruction is reachable (not a dead code),
5863          * verifier will complain with something like:
5864          * invalid func unknown#2001000123
5865          * where lower 123 is extern index into obj->externs[] array
5866          */
5867         insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
5868 }
5869
5870 /* Relocate data references within program code:
5871  *  - map references;
5872  *  - global variable references;
5873  *  - extern references.
5874  */
5875 static int
5876 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5877 {
5878         int i;
5879
5880         for (i = 0; i < prog->nr_reloc; i++) {
5881                 struct reloc_desc *relo = &prog->reloc_desc[i];
5882                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5883                 const struct bpf_map *map;
5884                 struct extern_desc *ext;
5885
5886                 switch (relo->type) {
5887                 case RELO_LD64:
5888                         map = &obj->maps[relo->map_idx];
5889                         if (obj->gen_loader) {
5890                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5891                                 insn[0].imm = relo->map_idx;
5892                         } else if (map->autocreate) {
5893                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5894                                 insn[0].imm = map->fd;
5895                         } else {
5896                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5897                                                    relo->map_idx, map);
5898                         }
5899                         break;
5900                 case RELO_DATA:
5901                         map = &obj->maps[relo->map_idx];
5902                         insn[1].imm = insn[0].imm + relo->sym_off;
5903                         if (obj->gen_loader) {
5904                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5905                                 insn[0].imm = relo->map_idx;
5906                         } else if (map->autocreate) {
5907                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5908                                 insn[0].imm = map->fd;
5909                         } else {
5910                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5911                                                    relo->map_idx, map);
5912                         }
5913                         break;
5914                 case RELO_EXTERN_LD64:
5915                         ext = &obj->externs[relo->ext_idx];
5916                         if (ext->type == EXT_KCFG) {
5917                                 if (obj->gen_loader) {
5918                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5919                                         insn[0].imm = obj->kconfig_map_idx;
5920                                 } else {
5921                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5922                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5923                                 }
5924                                 insn[1].imm = ext->kcfg.data_off;
5925                         } else /* EXT_KSYM */ {
5926                                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5927                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5928                                         insn[0].imm = ext->ksym.kernel_btf_id;
5929                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5930                                 } else { /* typeless ksyms or unresolved typed ksyms */
5931                                         insn[0].imm = (__u32)ext->ksym.addr;
5932                                         insn[1].imm = ext->ksym.addr >> 32;
5933                                 }
5934                         }
5935                         break;
5936                 case RELO_EXTERN_CALL:
5937                         ext = &obj->externs[relo->ext_idx];
5938                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5939                         if (ext->is_set) {
5940                                 insn[0].imm = ext->ksym.kernel_btf_id;
5941                                 insn[0].off = ext->ksym.btf_fd_idx;
5942                         } else { /* unresolved weak kfunc call */
5943                                 poison_kfunc_call(prog, i, relo->insn_idx, insn,
5944                                                   relo->ext_idx, ext);
5945                         }
5946                         break;
5947                 case RELO_SUBPROG_ADDR:
5948                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5949                                 pr_warn("prog '%s': relo #%d: bad insn\n",
5950                                         prog->name, i);
5951                                 return -EINVAL;
5952                         }
5953                         /* handled already */
5954                         break;
5955                 case RELO_CALL:
5956                         /* handled already */
5957                         break;
5958                 case RELO_CORE:
5959                         /* will be handled by bpf_program_record_relos() */
5960                         break;
5961                 default:
5962                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5963                                 prog->name, i, relo->type);
5964                         return -EINVAL;
5965                 }
5966         }
5967
5968         return 0;
5969 }
5970
5971 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5972                                     const struct bpf_program *prog,
5973                                     const struct btf_ext_info *ext_info,
5974                                     void **prog_info, __u32 *prog_rec_cnt,
5975                                     __u32 *prog_rec_sz)
5976 {
5977         void *copy_start = NULL, *copy_end = NULL;
5978         void *rec, *rec_end, *new_prog_info;
5979         const struct btf_ext_info_sec *sec;
5980         size_t old_sz, new_sz;
5981         int i, sec_num, sec_idx, off_adj;
5982
5983         sec_num = 0;
5984         for_each_btf_ext_sec(ext_info, sec) {
5985                 sec_idx = ext_info->sec_idxs[sec_num];
5986                 sec_num++;
5987                 if (prog->sec_idx != sec_idx)
5988                         continue;
5989
5990                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5991                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5992
5993                         if (insn_off < prog->sec_insn_off)
5994                                 continue;
5995                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5996                                 break;
5997
5998                         if (!copy_start)
5999                                 copy_start = rec;
6000                         copy_end = rec + ext_info->rec_size;
6001                 }
6002
6003                 if (!copy_start)
6004                         return -ENOENT;
6005
6006                 /* append func/line info of a given (sub-)program to the main
6007                  * program func/line info
6008                  */
6009                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
6010                 new_sz = old_sz + (copy_end - copy_start);
6011                 new_prog_info = realloc(*prog_info, new_sz);
6012                 if (!new_prog_info)
6013                         return -ENOMEM;
6014                 *prog_info = new_prog_info;
6015                 *prog_rec_cnt = new_sz / ext_info->rec_size;
6016                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
6017
6018                 /* Kernel instruction offsets are in units of 8-byte
6019                  * instructions, while .BTF.ext instruction offsets generated
6020                  * by Clang are in units of bytes. So convert Clang offsets
6021                  * into kernel offsets and adjust offset according to program
6022                  * relocated position.
6023                  */
6024                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
6025                 rec = new_prog_info + old_sz;
6026                 rec_end = new_prog_info + new_sz;
6027                 for (; rec < rec_end; rec += ext_info->rec_size) {
6028                         __u32 *insn_off = rec;
6029
6030                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
6031                 }
6032                 *prog_rec_sz = ext_info->rec_size;
6033                 return 0;
6034         }
6035
6036         return -ENOENT;
6037 }
6038
6039 static int
6040 reloc_prog_func_and_line_info(const struct bpf_object *obj,
6041                               struct bpf_program *main_prog,
6042                               const struct bpf_program *prog)
6043 {
6044         int err;
6045
6046         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
6047          * supprot func/line info
6048          */
6049         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
6050                 return 0;
6051
6052         /* only attempt func info relocation if main program's func_info
6053          * relocation was successful
6054          */
6055         if (main_prog != prog && !main_prog->func_info)
6056                 goto line_info;
6057
6058         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
6059                                        &main_prog->func_info,
6060                                        &main_prog->func_info_cnt,
6061                                        &main_prog->func_info_rec_size);
6062         if (err) {
6063                 if (err != -ENOENT) {
6064                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
6065                                 prog->name, err);
6066                         return err;
6067                 }
6068                 if (main_prog->func_info) {
6069                         /*
6070                          * Some info has already been found but has problem
6071                          * in the last btf_ext reloc. Must have to error out.
6072                          */
6073                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
6074                         return err;
6075                 }
6076                 /* Have problem loading the very first info. Ignore the rest. */
6077                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
6078                         prog->name);
6079         }
6080
6081 line_info:
6082         /* don't relocate line info if main program's relocation failed */
6083         if (main_prog != prog && !main_prog->line_info)
6084                 return 0;
6085
6086         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
6087                                        &main_prog->line_info,
6088                                        &main_prog->line_info_cnt,
6089                                        &main_prog->line_info_rec_size);
6090         if (err) {
6091                 if (err != -ENOENT) {
6092                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
6093                                 prog->name, err);
6094                         return err;
6095                 }
6096                 if (main_prog->line_info) {
6097                         /*
6098                          * Some info has already been found but has problem
6099                          * in the last btf_ext reloc. Must have to error out.
6100                          */
6101                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
6102                         return err;
6103                 }
6104                 /* Have problem loading the very first info. Ignore the rest. */
6105                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
6106                         prog->name);
6107         }
6108         return 0;
6109 }
6110
6111 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
6112 {
6113         size_t insn_idx = *(const size_t *)key;
6114         const struct reloc_desc *relo = elem;
6115
6116         if (insn_idx == relo->insn_idx)
6117                 return 0;
6118         return insn_idx < relo->insn_idx ? -1 : 1;
6119 }
6120
6121 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
6122 {
6123         if (!prog->nr_reloc)
6124                 return NULL;
6125         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
6126                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
6127 }
6128
6129 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
6130 {
6131         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
6132         struct reloc_desc *relos;
6133         int i;
6134
6135         if (main_prog == subprog)
6136                 return 0;
6137         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
6138         if (!relos)
6139                 return -ENOMEM;
6140         if (subprog->nr_reloc)
6141                 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
6142                        sizeof(*relos) * subprog->nr_reloc);
6143
6144         for (i = main_prog->nr_reloc; i < new_cnt; i++)
6145                 relos[i].insn_idx += subprog->sub_insn_off;
6146         /* After insn_idx adjustment the 'relos' array is still sorted
6147          * by insn_idx and doesn't break bsearch.
6148          */
6149         main_prog->reloc_desc = relos;
6150         main_prog->nr_reloc = new_cnt;
6151         return 0;
6152 }
6153
6154 static int
6155 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
6156                        struct bpf_program *prog)
6157 {
6158         size_t sub_insn_idx, insn_idx, new_cnt;
6159         struct bpf_program *subprog;
6160         struct bpf_insn *insns, *insn;
6161         struct reloc_desc *relo;
6162         int err;
6163
6164         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
6165         if (err)
6166                 return err;
6167
6168         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
6169                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6170                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
6171                         continue;
6172
6173                 relo = find_prog_insn_relo(prog, insn_idx);
6174                 if (relo && relo->type == RELO_EXTERN_CALL)
6175                         /* kfunc relocations will be handled later
6176                          * in bpf_object__relocate_data()
6177                          */
6178                         continue;
6179                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
6180                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
6181                                 prog->name, insn_idx, relo->type);
6182                         return -LIBBPF_ERRNO__RELOC;
6183                 }
6184                 if (relo) {
6185                         /* sub-program instruction index is a combination of
6186                          * an offset of a symbol pointed to by relocation and
6187                          * call instruction's imm field; for global functions,
6188                          * call always has imm = -1, but for static functions
6189                          * relocation is against STT_SECTION and insn->imm
6190                          * points to a start of a static function
6191                          *
6192                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6193                          * the byte offset in the corresponding section.
6194                          */
6195                         if (relo->type == RELO_CALL)
6196                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6197                         else
6198                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6199                 } else if (insn_is_pseudo_func(insn)) {
6200                         /*
6201                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6202                          * functions are in the same section, so it shouldn't reach here.
6203                          */
6204                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6205                                 prog->name, insn_idx);
6206                         return -LIBBPF_ERRNO__RELOC;
6207                 } else {
6208                         /* if subprogram call is to a static function within
6209                          * the same ELF section, there won't be any relocation
6210                          * emitted, but it also means there is no additional
6211                          * offset necessary, insns->imm is relative to
6212                          * instruction's original position within the section
6213                          */
6214                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6215                 }
6216
6217                 /* we enforce that sub-programs should be in .text section */
6218                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6219                 if (!subprog) {
6220                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6221                                 prog->name);
6222                         return -LIBBPF_ERRNO__RELOC;
6223                 }
6224
6225                 /* if it's the first call instruction calling into this
6226                  * subprogram (meaning this subprog hasn't been processed
6227                  * yet) within the context of current main program:
6228                  *   - append it at the end of main program's instructions blog;
6229                  *   - process is recursively, while current program is put on hold;
6230                  *   - if that subprogram calls some other not yet processes
6231                  *   subprogram, same thing will happen recursively until
6232                  *   there are no more unprocesses subprograms left to append
6233                  *   and relocate.
6234                  */
6235                 if (subprog->sub_insn_off == 0) {
6236                         subprog->sub_insn_off = main_prog->insns_cnt;
6237
6238                         new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
6239                         insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
6240                         if (!insns) {
6241                                 pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
6242                                 return -ENOMEM;
6243                         }
6244                         main_prog->insns = insns;
6245                         main_prog->insns_cnt = new_cnt;
6246
6247                         memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
6248                                subprog->insns_cnt * sizeof(*insns));
6249
6250                         pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
6251                                  main_prog->name, subprog->insns_cnt, subprog->name);
6252
6253                         /* The subprog insns are now appended. Append its relos too. */
6254                         err = append_subprog_relos(main_prog, subprog);
6255                         if (err)
6256                                 return err;
6257                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6258                         if (err)
6259                                 return err;
6260                 }
6261
6262                 /* main_prog->insns memory could have been re-allocated, so
6263                  * calculate pointer again
6264                  */
6265                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6266                 /* calculate correct instruction position within current main
6267                  * prog; each main prog can have a different set of
6268                  * subprograms appended (potentially in different order as
6269                  * well), so position of any subprog can be different for
6270                  * different main programs
6271                  */
6272                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6273
6274                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6275                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6276         }
6277
6278         return 0;
6279 }
6280
6281 /*
6282  * Relocate sub-program calls.
6283  *
6284  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6285  * main prog) is processed separately. For each subprog (non-entry functions,
6286  * that can be called from either entry progs or other subprogs) gets their
6287  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6288  * hasn't been yet appended and relocated within current main prog. Once its
6289  * relocated, sub_insn_off will point at the position within current main prog
6290  * where given subprog was appended. This will further be used to relocate all
6291  * the call instructions jumping into this subprog.
6292  *
6293  * We start with main program and process all call instructions. If the call
6294  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6295  * is zero), subprog instructions are appended at the end of main program's
6296  * instruction array. Then main program is "put on hold" while we recursively
6297  * process newly appended subprogram. If that subprogram calls into another
6298  * subprogram that hasn't been appended, new subprogram is appended again to
6299  * the *main* prog's instructions (subprog's instructions are always left
6300  * untouched, as they need to be in unmodified state for subsequent main progs
6301  * and subprog instructions are always sent only as part of a main prog) and
6302  * the process continues recursively. Once all the subprogs called from a main
6303  * prog or any of its subprogs are appended (and relocated), all their
6304  * positions within finalized instructions array are known, so it's easy to
6305  * rewrite call instructions with correct relative offsets, corresponding to
6306  * desired target subprog.
6307  *
6308  * Its important to realize that some subprogs might not be called from some
6309  * main prog and any of its called/used subprogs. Those will keep their
6310  * subprog->sub_insn_off as zero at all times and won't be appended to current
6311  * main prog and won't be relocated within the context of current main prog.
6312  * They might still be used from other main progs later.
6313  *
6314  * Visually this process can be shown as below. Suppose we have two main
6315  * programs mainA and mainB and BPF object contains three subprogs: subA,
6316  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6317  * subC both call subB:
6318  *
6319  *        +--------+ +-------+
6320  *        |        v v       |
6321  *     +--+---+ +--+-+-+ +---+--+
6322  *     | subA | | subB | | subC |
6323  *     +--+---+ +------+ +---+--+
6324  *        ^                  ^
6325  *        |                  |
6326  *    +---+-------+   +------+----+
6327  *    |   mainA   |   |   mainB   |
6328  *    +-----------+   +-----------+
6329  *
6330  * We'll start relocating mainA, will find subA, append it and start
6331  * processing sub A recursively:
6332  *
6333  *    +-----------+------+
6334  *    |   mainA   | subA |
6335  *    +-----------+------+
6336  *
6337  * At this point we notice that subB is used from subA, so we append it and
6338  * relocate (there are no further subcalls from subB):
6339  *
6340  *    +-----------+------+------+
6341  *    |   mainA   | subA | subB |
6342  *    +-----------+------+------+
6343  *
6344  * At this point, we relocate subA calls, then go one level up and finish with
6345  * relocatin mainA calls. mainA is done.
6346  *
6347  * For mainB process is similar but results in different order. We start with
6348  * mainB and skip subA and subB, as mainB never calls them (at least
6349  * directly), but we see subC is needed, so we append and start processing it:
6350  *
6351  *    +-----------+------+
6352  *    |   mainB   | subC |
6353  *    +-----------+------+
6354  * Now we see subC needs subB, so we go back to it, append and relocate it:
6355  *
6356  *    +-----------+------+------+
6357  *    |   mainB   | subC | subB |
6358  *    +-----------+------+------+
6359  *
6360  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6361  */
6362 static int
6363 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6364 {
6365         struct bpf_program *subprog;
6366         int i, err;
6367
6368         /* mark all subprogs as not relocated (yet) within the context of
6369          * current main program
6370          */
6371         for (i = 0; i < obj->nr_programs; i++) {
6372                 subprog = &obj->programs[i];
6373                 if (!prog_is_subprog(obj, subprog))
6374                         continue;
6375
6376                 subprog->sub_insn_off = 0;
6377         }
6378
6379         err = bpf_object__reloc_code(obj, prog, prog);
6380         if (err)
6381                 return err;
6382
6383         return 0;
6384 }
6385
6386 static void
6387 bpf_object__free_relocs(struct bpf_object *obj)
6388 {
6389         struct bpf_program *prog;
6390         int i;
6391
6392         /* free up relocation descriptors */
6393         for (i = 0; i < obj->nr_programs; i++) {
6394                 prog = &obj->programs[i];
6395                 zfree(&prog->reloc_desc);
6396                 prog->nr_reloc = 0;
6397         }
6398 }
6399
6400 static int cmp_relocs(const void *_a, const void *_b)
6401 {
6402         const struct reloc_desc *a = _a;
6403         const struct reloc_desc *b = _b;
6404
6405         if (a->insn_idx != b->insn_idx)
6406                 return a->insn_idx < b->insn_idx ? -1 : 1;
6407
6408         /* no two relocations should have the same insn_idx, but ... */
6409         if (a->type != b->type)
6410                 return a->type < b->type ? -1 : 1;
6411
6412         return 0;
6413 }
6414
6415 static void bpf_object__sort_relos(struct bpf_object *obj)
6416 {
6417         int i;
6418
6419         for (i = 0; i < obj->nr_programs; i++) {
6420                 struct bpf_program *p = &obj->programs[i];
6421
6422                 if (!p->nr_reloc)
6423                         continue;
6424
6425                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6426         }
6427 }
6428
6429 static int
6430 bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6431 {
6432         struct bpf_program *prog;
6433         size_t i, j;
6434         int err;
6435
6436         if (obj->btf_ext) {
6437                 err = bpf_object__relocate_core(obj, targ_btf_path);
6438                 if (err) {
6439                         pr_warn("failed to perform CO-RE relocations: %d\n",
6440                                 err);
6441                         return err;
6442                 }
6443                 bpf_object__sort_relos(obj);
6444         }
6445
6446         /* Before relocating calls pre-process relocations and mark
6447          * few ld_imm64 instructions that points to subprogs.
6448          * Otherwise bpf_object__reloc_code() later would have to consider
6449          * all ld_imm64 insns as relocation candidates. That would
6450          * reduce relocation speed, since amount of find_prog_insn_relo()
6451          * would increase and most of them will fail to find a relo.
6452          */
6453         for (i = 0; i < obj->nr_programs; i++) {
6454                 prog = &obj->programs[i];
6455                 for (j = 0; j < prog->nr_reloc; j++) {
6456                         struct reloc_desc *relo = &prog->reloc_desc[j];
6457                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6458
6459                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6460                         if (relo->type == RELO_SUBPROG_ADDR)
6461                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6462                 }
6463         }
6464
6465         /* relocate subprogram calls and append used subprograms to main
6466          * programs; each copy of subprogram code needs to be relocated
6467          * differently for each main program, because its code location might
6468          * have changed.
6469          * Append subprog relos to main programs to allow data relos to be
6470          * processed after text is completely relocated.
6471          */
6472         for (i = 0; i < obj->nr_programs; i++) {
6473                 prog = &obj->programs[i];
6474                 /* sub-program's sub-calls are relocated within the context of
6475                  * its main program only
6476                  */
6477                 if (prog_is_subprog(obj, prog))
6478                         continue;
6479                 if (!prog->autoload)
6480                         continue;
6481
6482                 err = bpf_object__relocate_calls(obj, prog);
6483                 if (err) {
6484                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6485                                 prog->name, err);
6486                         return err;
6487                 }
6488         }
6489         /* Process data relos for main programs */
6490         for (i = 0; i < obj->nr_programs; i++) {
6491                 prog = &obj->programs[i];
6492                 if (prog_is_subprog(obj, prog))
6493                         continue;
6494                 if (!prog->autoload)
6495                         continue;
6496                 err = bpf_object__relocate_data(obj, prog);
6497                 if (err) {
6498                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6499                                 prog->name, err);
6500                         return err;
6501                 }
6502         }
6503
6504         return 0;
6505 }
6506
6507 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6508                                             Elf64_Shdr *shdr, Elf_Data *data);
6509
6510 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6511                                          Elf64_Shdr *shdr, Elf_Data *data)
6512 {
6513         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6514         int i, j, nrels, new_sz;
6515         const struct btf_var_secinfo *vi = NULL;
6516         const struct btf_type *sec, *var, *def;
6517         struct bpf_map *map = NULL, *targ_map = NULL;
6518         struct bpf_program *targ_prog = NULL;
6519         bool is_prog_array, is_map_in_map;
6520         const struct btf_member *member;
6521         const char *name, *mname, *type;
6522         unsigned int moff;
6523         Elf64_Sym *sym;
6524         Elf64_Rel *rel;
6525         void *tmp;
6526
6527         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6528                 return -EINVAL;
6529         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6530         if (!sec)
6531                 return -EINVAL;
6532
6533         nrels = shdr->sh_size / shdr->sh_entsize;
6534         for (i = 0; i < nrels; i++) {
6535                 rel = elf_rel_by_idx(data, i);
6536                 if (!rel) {
6537                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6538                         return -LIBBPF_ERRNO__FORMAT;
6539                 }
6540
6541                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6542                 if (!sym) {
6543                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6544                                 i, (size_t)ELF64_R_SYM(rel->r_info));
6545                         return -LIBBPF_ERRNO__FORMAT;
6546                 }
6547                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6548
6549                 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6550                          i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6551                          (size_t)rel->r_offset, sym->st_name, name);
6552
6553                 for (j = 0; j < obj->nr_maps; j++) {
6554                         map = &obj->maps[j];
6555                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6556                                 continue;
6557
6558                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6559                         if (vi->offset <= rel->r_offset &&
6560                             rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6561                                 break;
6562                 }
6563                 if (j == obj->nr_maps) {
6564                         pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6565                                 i, name, (size_t)rel->r_offset);
6566                         return -EINVAL;
6567                 }
6568
6569                 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6570                 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6571                 type = is_map_in_map ? "map" : "prog";
6572                 if (is_map_in_map) {
6573                         if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6574                                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6575                                         i, name);
6576                                 return -LIBBPF_ERRNO__RELOC;
6577                         }
6578                         if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6579                             map->def.key_size != sizeof(int)) {
6580                                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6581                                         i, map->name, sizeof(int));
6582                                 return -EINVAL;
6583                         }
6584                         targ_map = bpf_object__find_map_by_name(obj, name);
6585                         if (!targ_map) {
6586                                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6587                                         i, name);
6588                                 return -ESRCH;
6589                         }
6590                 } else if (is_prog_array) {
6591                         targ_prog = bpf_object__find_program_by_name(obj, name);
6592                         if (!targ_prog) {
6593                                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6594                                         i, name);
6595                                 return -ESRCH;
6596                         }
6597                         if (targ_prog->sec_idx != sym->st_shndx ||
6598                             targ_prog->sec_insn_off * 8 != sym->st_value ||
6599                             prog_is_subprog(obj, targ_prog)) {
6600                                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6601                                         i, name);
6602                                 return -LIBBPF_ERRNO__RELOC;
6603                         }
6604                 } else {
6605                         return -EINVAL;
6606                 }
6607
6608                 var = btf__type_by_id(obj->btf, vi->type);
6609                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6610                 if (btf_vlen(def) == 0)
6611                         return -EINVAL;
6612                 member = btf_members(def) + btf_vlen(def) - 1;
6613                 mname = btf__name_by_offset(obj->btf, member->name_off);
6614                 if (strcmp(mname, "values"))
6615                         return -EINVAL;
6616
6617                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6618                 if (rel->r_offset - vi->offset < moff)
6619                         return -EINVAL;
6620
6621                 moff = rel->r_offset - vi->offset - moff;
6622                 /* here we use BPF pointer size, which is always 64 bit, as we
6623                  * are parsing ELF that was built for BPF target
6624                  */
6625                 if (moff % bpf_ptr_sz)
6626                         return -EINVAL;
6627                 moff /= bpf_ptr_sz;
6628                 if (moff >= map->init_slots_sz) {
6629                         new_sz = moff + 1;
6630                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6631                         if (!tmp)
6632                                 return -ENOMEM;
6633                         map->init_slots = tmp;
6634                         memset(map->init_slots + map->init_slots_sz, 0,
6635                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6636                         map->init_slots_sz = new_sz;
6637                 }
6638                 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
6639
6640                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
6641                          i, map->name, moff, type, name);
6642         }
6643
6644         return 0;
6645 }
6646
6647 static int bpf_object__collect_relos(struct bpf_object *obj)
6648 {
6649         int i, err;
6650
6651         for (i = 0; i < obj->efile.sec_cnt; i++) {
6652                 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
6653                 Elf64_Shdr *shdr;
6654                 Elf_Data *data;
6655                 int idx;
6656
6657                 if (sec_desc->sec_type != SEC_RELO)
6658                         continue;
6659
6660                 shdr = sec_desc->shdr;
6661                 data = sec_desc->data;
6662                 idx = shdr->sh_info;
6663
6664                 if (shdr->sh_type != SHT_REL) {
6665                         pr_warn("internal error at %d\n", __LINE__);
6666                         return -LIBBPF_ERRNO__INTERNAL;
6667                 }
6668
6669                 if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
6670                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6671                 else if (idx == obj->efile.btf_maps_shndx)
6672                         err = bpf_object__collect_map_relos(obj, shdr, data);
6673                 else
6674                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6675                 if (err)
6676                         return err;
6677         }
6678
6679         bpf_object__sort_relos(obj);
6680         return 0;
6681 }
6682
6683 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6684 {
6685         if (BPF_CLASS(insn->code) == BPF_JMP &&
6686             BPF_OP(insn->code) == BPF_CALL &&
6687             BPF_SRC(insn->code) == BPF_K &&
6688             insn->src_reg == 0 &&
6689             insn->dst_reg == 0) {
6690                     *func_id = insn->imm;
6691                     return true;
6692         }
6693         return false;
6694 }
6695
6696 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
6697 {
6698         struct bpf_insn *insn = prog->insns;
6699         enum bpf_func_id func_id;
6700         int i;
6701
6702         if (obj->gen_loader)
6703                 return 0;
6704
6705         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6706                 if (!insn_is_helper_call(insn, &func_id))
6707                         continue;
6708
6709                 /* on kernels that don't yet support
6710                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6711                  * to bpf_probe_read() which works well for old kernels
6712                  */
6713                 switch (func_id) {
6714                 case BPF_FUNC_probe_read_kernel:
6715                 case BPF_FUNC_probe_read_user:
6716                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6717                                 insn->imm = BPF_FUNC_probe_read;
6718                         break;
6719                 case BPF_FUNC_probe_read_kernel_str:
6720                 case BPF_FUNC_probe_read_user_str:
6721                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6722                                 insn->imm = BPF_FUNC_probe_read_str;
6723                         break;
6724                 default:
6725                         break;
6726                 }
6727         }
6728         return 0;
6729 }
6730
6731 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
6732                                      int *btf_obj_fd, int *btf_type_id);
6733
6734 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
6735 static int libbpf_prepare_prog_load(struct bpf_program *prog,
6736                                     struct bpf_prog_load_opts *opts, long cookie)
6737 {
6738         enum sec_def_flags def = cookie;
6739
6740         /* old kernels might not support specifying expected_attach_type */
6741         if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
6742                 opts->expected_attach_type = 0;
6743
6744         if (def & SEC_SLEEPABLE)
6745                 opts->prog_flags |= BPF_F_SLEEPABLE;
6746
6747         if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
6748                 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
6749
6750         if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
6751                 int btf_obj_fd = 0, btf_type_id = 0, err;
6752                 const char *attach_name;
6753
6754                 attach_name = strchr(prog->sec_name, '/');
6755                 if (!attach_name) {
6756                         /* if BPF program is annotated with just SEC("fentry")
6757                          * (or similar) without declaratively specifying
6758                          * target, then it is expected that target will be
6759                          * specified with bpf_program__set_attach_target() at
6760                          * runtime before BPF object load step. If not, then
6761                          * there is nothing to load into the kernel as BPF
6762                          * verifier won't be able to validate BPF program
6763                          * correctness anyways.
6764                          */
6765                         pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
6766                                 prog->name);
6767                         return -EINVAL;
6768                 }
6769                 attach_name++; /* skip over / */
6770
6771                 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
6772                 if (err)
6773                         return err;
6774
6775                 /* cache resolved BTF FD and BTF type ID in the prog */
6776                 prog->attach_btf_obj_fd = btf_obj_fd;
6777                 prog->attach_btf_id = btf_type_id;
6778
6779                 /* but by now libbpf common logic is not utilizing
6780                  * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
6781                  * this callback is called after opts were populated by
6782                  * libbpf, so this callback has to update opts explicitly here
6783                  */
6784                 opts->attach_btf_obj_fd = btf_obj_fd;
6785                 opts->attach_btf_id = btf_type_id;
6786         }
6787         return 0;
6788 }
6789
6790 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
6791
6792 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
6793                                 struct bpf_insn *insns, int insns_cnt,
6794                                 const char *license, __u32 kern_version, int *prog_fd)
6795 {
6796         LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
6797         const char *prog_name = NULL;
6798         char *cp, errmsg[STRERR_BUFSIZE];
6799         size_t log_buf_size = 0;
6800         char *log_buf = NULL, *tmp;
6801         int btf_fd, ret, err;
6802         bool own_log_buf = true;
6803         __u32 log_level = prog->log_level;
6804
6805         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
6806                 /*
6807                  * The program type must be set.  Most likely we couldn't find a proper
6808                  * section definition at load time, and thus we didn't infer the type.
6809                  */
6810                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
6811                         prog->name, prog->sec_name);
6812                 return -EINVAL;
6813         }
6814
6815         if (!insns || !insns_cnt)
6816                 return -EINVAL;
6817
6818         load_attr.expected_attach_type = prog->expected_attach_type;
6819         if (kernel_supports(obj, FEAT_PROG_NAME))
6820                 prog_name = prog->name;
6821         load_attr.attach_prog_fd = prog->attach_prog_fd;
6822         load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
6823         load_attr.attach_btf_id = prog->attach_btf_id;
6824         load_attr.kern_version = kern_version;
6825         load_attr.prog_ifindex = prog->prog_ifindex;
6826
6827         /* specify func_info/line_info only if kernel supports them */
6828         btf_fd = bpf_object__btf_fd(obj);
6829         if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
6830                 load_attr.prog_btf_fd = btf_fd;
6831                 load_attr.func_info = prog->func_info;
6832                 load_attr.func_info_rec_size = prog->func_info_rec_size;
6833                 load_attr.func_info_cnt = prog->func_info_cnt;
6834                 load_attr.line_info = prog->line_info;
6835                 load_attr.line_info_rec_size = prog->line_info_rec_size;
6836                 load_attr.line_info_cnt = prog->line_info_cnt;
6837         }
6838         load_attr.log_level = log_level;
6839         load_attr.prog_flags = prog->prog_flags;
6840         load_attr.fd_array = obj->fd_array;
6841
6842         /* adjust load_attr if sec_def provides custom preload callback */
6843         if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
6844                 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
6845                 if (err < 0) {
6846                         pr_warn("prog '%s': failed to prepare load attributes: %d\n",
6847                                 prog->name, err);
6848                         return err;
6849                 }
6850                 insns = prog->insns;
6851                 insns_cnt = prog->insns_cnt;
6852         }
6853
6854         if (obj->gen_loader) {
6855                 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
6856                                    license, insns, insns_cnt, &load_attr,
6857                                    prog - obj->programs);
6858                 *prog_fd = -1;
6859                 return 0;
6860         }
6861
6862 retry_load:
6863         /* if log_level is zero, we don't request logs initially even if
6864          * custom log_buf is specified; if the program load fails, then we'll
6865          * bump log_level to 1 and use either custom log_buf or we'll allocate
6866          * our own and retry the load to get details on what failed
6867          */
6868         if (log_level) {
6869                 if (prog->log_buf) {
6870                         log_buf = prog->log_buf;
6871                         log_buf_size = prog->log_size;
6872                         own_log_buf = false;
6873                 } else if (obj->log_buf) {
6874                         log_buf = obj->log_buf;
6875                         log_buf_size = obj->log_size;
6876                         own_log_buf = false;
6877                 } else {
6878                         log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
6879                         tmp = realloc(log_buf, log_buf_size);
6880                         if (!tmp) {
6881                                 ret = -ENOMEM;
6882                                 goto out;
6883                         }
6884                         log_buf = tmp;
6885                         log_buf[0] = '\0';
6886                         own_log_buf = true;
6887                 }
6888         }
6889
6890         load_attr.log_buf = log_buf;
6891         load_attr.log_size = log_buf_size;
6892         load_attr.log_level = log_level;
6893
6894         ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
6895         if (ret >= 0) {
6896                 if (log_level && own_log_buf) {
6897                         pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6898                                  prog->name, log_buf);
6899                 }
6900
6901                 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
6902                         struct bpf_map *map;
6903                         int i;
6904
6905                         for (i = 0; i < obj->nr_maps; i++) {
6906                                 map = &prog->obj->maps[i];
6907                                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
6908                                         continue;
6909
6910                                 if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {
6911                                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6912                                         pr_warn("prog '%s': failed to bind map '%s': %s\n",
6913                                                 prog->name, map->real_name, cp);
6914                                         /* Don't fail hard if can't bind rodata. */
6915                                 }
6916                         }
6917                 }
6918
6919                 *prog_fd = ret;
6920                 ret = 0;
6921                 goto out;
6922         }
6923
6924         if (log_level == 0) {
6925                 log_level = 1;
6926                 goto retry_load;
6927         }
6928         /* On ENOSPC, increase log buffer size and retry, unless custom
6929          * log_buf is specified.
6930          * Be careful to not overflow u32, though. Kernel's log buf size limit
6931          * isn't part of UAPI so it can always be bumped to full 4GB. So don't
6932          * multiply by 2 unless we are sure we'll fit within 32 bits.
6933          * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
6934          */
6935         if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
6936                 goto retry_load;
6937
6938         ret = -errno;
6939
6940         /* post-process verifier log to improve error descriptions */
6941         fixup_verifier_log(prog, log_buf, log_buf_size);
6942
6943         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
6944         pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
6945         pr_perm_msg(ret);
6946
6947         if (own_log_buf && log_buf && log_buf[0] != '\0') {
6948                 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
6949                         prog->name, log_buf);
6950         }
6951
6952 out:
6953         if (own_log_buf)
6954                 free(log_buf);
6955         return ret;
6956 }
6957
6958 static char *find_prev_line(char *buf, char *cur)
6959 {
6960         char *p;
6961
6962         if (cur == buf) /* end of a log buf */
6963                 return NULL;
6964
6965         p = cur - 1;
6966         while (p - 1 >= buf && *(p - 1) != '\n')
6967                 p--;
6968
6969         return p;
6970 }
6971
6972 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
6973                       char *orig, size_t orig_sz, const char *patch)
6974 {
6975         /* size of the remaining log content to the right from the to-be-replaced part */
6976         size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
6977         size_t patch_sz = strlen(patch);
6978
6979         if (patch_sz != orig_sz) {
6980                 /* If patch line(s) are longer than original piece of verifier log,
6981                  * shift log contents by (patch_sz - orig_sz) bytes to the right
6982                  * starting from after to-be-replaced part of the log.
6983                  *
6984                  * If patch line(s) are shorter than original piece of verifier log,
6985                  * shift log contents by (orig_sz - patch_sz) bytes to the left
6986                  * starting from after to-be-replaced part of the log
6987                  *
6988                  * We need to be careful about not overflowing available
6989                  * buf_sz capacity. If that's the case, we'll truncate the end
6990                  * of the original log, as necessary.
6991                  */
6992                 if (patch_sz > orig_sz) {
6993                         if (orig + patch_sz >= buf + buf_sz) {
6994                                 /* patch is big enough to cover remaining space completely */
6995                                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
6996                                 rem_sz = 0;
6997                         } else if (patch_sz - orig_sz > buf_sz - log_sz) {
6998                                 /* patch causes part of remaining log to be truncated */
6999                                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7000                         }
7001                 }
7002                 /* shift remaining log to the right by calculated amount */
7003                 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7004         }
7005
7006         memcpy(orig, patch, patch_sz);
7007 }
7008
7009 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7010                                        char *buf, size_t buf_sz, size_t log_sz,
7011                                        char *line1, char *line2, char *line3)
7012 {
7013         /* Expected log for failed and not properly guarded CO-RE relocation:
7014          * line1 -> 123: (85) call unknown#195896080
7015          * line2 -> invalid func unknown#195896080
7016          * line3 -> <anything else or end of buffer>
7017          *
7018          * "123" is the index of the instruction that was poisoned. We extract
7019          * instruction index to find corresponding CO-RE relocation and
7020          * replace this part of the log with more relevant information about
7021          * failed CO-RE relocation.
7022          */
7023         const struct bpf_core_relo *relo;
7024         struct bpf_core_spec spec;
7025         char patch[512], spec_buf[256];
7026         int insn_idx, err, spec_len;
7027
7028         if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7029                 return;
7030
7031         relo = find_relo_core(prog, insn_idx);
7032         if (!relo)
7033                 return;
7034
7035         err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7036         if (err)
7037                 return;
7038
7039         spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7040         snprintf(patch, sizeof(patch),
7041                  "%d: <invalid CO-RE relocation>\n"
7042                  "failed to resolve CO-RE relocation %s%s\n",
7043                  insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7044
7045         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7046 }
7047
7048 static void fixup_log_missing_map_load(struct bpf_program *prog,
7049                                        char *buf, size_t buf_sz, size_t log_sz,
7050                                        char *line1, char *line2, char *line3)
7051 {
7052         /* Expected log for failed and not properly guarded map reference:
7053          * line1 -> 123: (85) call unknown#2001000345
7054          * line2 -> invalid func unknown#2001000345
7055          * line3 -> <anything else or end of buffer>
7056          *
7057          * "123" is the index of the instruction that was poisoned.
7058          * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7059          */
7060         struct bpf_object *obj = prog->obj;
7061         const struct bpf_map *map;
7062         int insn_idx, map_idx;
7063         char patch[128];
7064
7065         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7066                 return;
7067
7068         map_idx -= POISON_LDIMM64_MAP_BASE;
7069         if (map_idx < 0 || map_idx >= obj->nr_maps)
7070                 return;
7071         map = &obj->maps[map_idx];
7072
7073         snprintf(patch, sizeof(patch),
7074                  "%d: <invalid BPF map reference>\n"
7075                  "BPF map '%s' is referenced but wasn't created\n",
7076                  insn_idx, map->name);
7077
7078         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7079 }
7080
7081 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7082                                          char *buf, size_t buf_sz, size_t log_sz,
7083                                          char *line1, char *line2, char *line3)
7084 {
7085         /* Expected log for failed and not properly guarded kfunc call:
7086          * line1 -> 123: (85) call unknown#2002000345
7087          * line2 -> invalid func unknown#2002000345
7088          * line3 -> <anything else or end of buffer>
7089          *
7090          * "123" is the index of the instruction that was poisoned.
7091          * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7092          */
7093         struct bpf_object *obj = prog->obj;
7094         const struct extern_desc *ext;
7095         int insn_idx, ext_idx;
7096         char patch[128];
7097
7098         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7099                 return;
7100
7101         ext_idx -= POISON_CALL_KFUNC_BASE;
7102         if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7103                 return;
7104         ext = &obj->externs[ext_idx];
7105
7106         snprintf(patch, sizeof(patch),
7107                  "%d: <invalid kfunc call>\n"
7108                  "kfunc '%s' is referenced but wasn't resolved\n",
7109                  insn_idx, ext->name);
7110
7111         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7112 }
7113
7114 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7115 {
7116         /* look for familiar error patterns in last N lines of the log */
7117         const size_t max_last_line_cnt = 10;
7118         char *prev_line, *cur_line, *next_line;
7119         size_t log_sz;
7120         int i;
7121
7122         if (!buf)
7123                 return;
7124
7125         log_sz = strlen(buf) + 1;
7126         next_line = buf + log_sz - 1;
7127
7128         for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7129                 cur_line = find_prev_line(buf, next_line);
7130                 if (!cur_line)
7131                         return;
7132
7133                 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7134                         prev_line = find_prev_line(buf, cur_line);
7135                         if (!prev_line)
7136                                 continue;
7137
7138                         /* failed CO-RE relocation case */
7139                         fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7140                                                    prev_line, cur_line, next_line);
7141                         return;
7142                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7143                         prev_line = find_prev_line(buf, cur_line);
7144                         if (!prev_line)
7145                                 continue;
7146
7147                         /* reference to uncreated BPF map */
7148                         fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7149                                                    prev_line, cur_line, next_line);
7150                         return;
7151                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7152                         prev_line = find_prev_line(buf, cur_line);
7153                         if (!prev_line)
7154                                 continue;
7155
7156                         /* reference to unresolved kfunc */
7157                         fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7158                                                      prev_line, cur_line, next_line);
7159                         return;
7160                 }
7161         }
7162 }
7163
7164 static int bpf_program_record_relos(struct bpf_program *prog)
7165 {
7166         struct bpf_object *obj = prog->obj;
7167         int i;
7168
7169         for (i = 0; i < prog->nr_reloc; i++) {
7170                 struct reloc_desc *relo = &prog->reloc_desc[i];
7171                 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7172                 int kind;
7173
7174                 switch (relo->type) {
7175                 case RELO_EXTERN_LD64:
7176                         if (ext->type != EXT_KSYM)
7177                                 continue;
7178                         kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7179                                 BTF_KIND_VAR : BTF_KIND_FUNC;
7180                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7181                                                ext->is_weak, !ext->ksym.type_id,
7182                                                true, kind, relo->insn_idx);
7183                         break;
7184                 case RELO_EXTERN_CALL:
7185                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7186                                                ext->is_weak, false, false, BTF_KIND_FUNC,
7187                                                relo->insn_idx);
7188                         break;
7189                 case RELO_CORE: {
7190                         struct bpf_core_relo cr = {
7191                                 .insn_off = relo->insn_idx * 8,
7192                                 .type_id = relo->core_relo->type_id,
7193                                 .access_str_off = relo->core_relo->access_str_off,
7194                                 .kind = relo->core_relo->kind,
7195                         };
7196
7197                         bpf_gen__record_relo_core(obj->gen_loader, &cr);
7198                         break;
7199                 }
7200                 default:
7201                         continue;
7202                 }
7203         }
7204         return 0;
7205 }
7206
7207 static int
7208 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7209 {
7210         struct bpf_program *prog;
7211         size_t i;
7212         int err;
7213
7214         for (i = 0; i < obj->nr_programs; i++) {
7215                 prog = &obj->programs[i];
7216                 err = bpf_object__sanitize_prog(obj, prog);
7217                 if (err)
7218                         return err;
7219         }
7220
7221         for (i = 0; i < obj->nr_programs; i++) {
7222                 prog = &obj->programs[i];
7223                 if (prog_is_subprog(obj, prog))
7224                         continue;
7225                 if (!prog->autoload) {
7226                         pr_debug("prog '%s': skipped loading\n", prog->name);
7227                         continue;
7228                 }
7229                 prog->log_level |= log_level;
7230
7231                 if (obj->gen_loader)
7232                         bpf_program_record_relos(prog);
7233
7234                 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7235                                            obj->license, obj->kern_version, &prog->fd);
7236                 if (err) {
7237                         pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7238                         return err;
7239                 }
7240         }
7241
7242         bpf_object__free_relocs(obj);
7243         return 0;
7244 }
7245
7246 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7247
7248 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7249 {
7250         struct bpf_program *prog;
7251         int err;
7252
7253         bpf_object__for_each_program(prog, obj) {
7254                 prog->sec_def = find_sec_def(prog->sec_name);
7255                 if (!prog->sec_def) {
7256                         /* couldn't guess, but user might manually specify */
7257                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7258                                 prog->name, prog->sec_name);
7259                         continue;
7260                 }
7261
7262                 prog->type = prog->sec_def->prog_type;
7263                 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7264
7265                 /* sec_def can have custom callback which should be called
7266                  * after bpf_program is initialized to adjust its properties
7267                  */
7268                 if (prog->sec_def->prog_setup_fn) {
7269                         err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7270                         if (err < 0) {
7271                                 pr_warn("prog '%s': failed to initialize: %d\n",
7272                                         prog->name, err);
7273                                 return err;
7274                         }
7275                 }
7276         }
7277
7278         return 0;
7279 }
7280
7281 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7282                                           const struct bpf_object_open_opts *opts)
7283 {
7284         const char *obj_name, *kconfig, *btf_tmp_path;
7285         struct bpf_object *obj;
7286         char tmp_name[64];
7287         int err;
7288         char *log_buf;
7289         size_t log_size;
7290         __u32 log_level;
7291
7292         if (elf_version(EV_CURRENT) == EV_NONE) {
7293                 pr_warn("failed to init libelf for %s\n",
7294                         path ? : "(mem buf)");
7295                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7296         }
7297
7298         if (!OPTS_VALID(opts, bpf_object_open_opts))
7299                 return ERR_PTR(-EINVAL);
7300
7301         obj_name = OPTS_GET(opts, object_name, NULL);
7302         if (obj_buf) {
7303                 if (!obj_name) {
7304                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7305                                  (unsigned long)obj_buf,
7306                                  (unsigned long)obj_buf_sz);
7307                         obj_name = tmp_name;
7308                 }
7309                 path = obj_name;
7310                 pr_debug("loading object '%s' from buffer\n", obj_name);
7311         }
7312
7313         log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7314         log_size = OPTS_GET(opts, kernel_log_size, 0);
7315         log_level = OPTS_GET(opts, kernel_log_level, 0);
7316         if (log_size > UINT_MAX)
7317                 return ERR_PTR(-EINVAL);
7318         if (log_size && !log_buf)
7319                 return ERR_PTR(-EINVAL);
7320
7321         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7322         if (IS_ERR(obj))
7323                 return obj;
7324
7325         obj->log_buf = log_buf;
7326         obj->log_size = log_size;
7327         obj->log_level = log_level;
7328
7329         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7330         if (btf_tmp_path) {
7331                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7332                         err = -ENAMETOOLONG;
7333                         goto out;
7334                 }
7335                 obj->btf_custom_path = strdup(btf_tmp_path);
7336                 if (!obj->btf_custom_path) {
7337                         err = -ENOMEM;
7338                         goto out;
7339                 }
7340         }
7341
7342         kconfig = OPTS_GET(opts, kconfig, NULL);
7343         if (kconfig) {
7344                 obj->kconfig = strdup(kconfig);
7345                 if (!obj->kconfig) {
7346                         err = -ENOMEM;
7347                         goto out;
7348                 }
7349         }
7350
7351         err = bpf_object__elf_init(obj);
7352         err = err ? : bpf_object__check_endianness(obj);
7353         err = err ? : bpf_object__elf_collect(obj);
7354         err = err ? : bpf_object__collect_externs(obj);
7355         err = err ? : bpf_object_fixup_btf(obj);
7356         err = err ? : bpf_object__init_maps(obj, opts);
7357         err = err ? : bpf_object_init_progs(obj, opts);
7358         err = err ? : bpf_object__collect_relos(obj);
7359         if (err)
7360                 goto out;
7361
7362         bpf_object__elf_finish(obj);
7363
7364         return obj;
7365 out:
7366         bpf_object__close(obj);
7367         return ERR_PTR(err);
7368 }
7369
7370 struct bpf_object *
7371 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7372 {
7373         if (!path)
7374                 return libbpf_err_ptr(-EINVAL);
7375
7376         pr_debug("loading %s\n", path);
7377
7378         return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7379 }
7380
7381 struct bpf_object *bpf_object__open(const char *path)
7382 {
7383         return bpf_object__open_file(path, NULL);
7384 }
7385
7386 struct bpf_object *
7387 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7388                      const struct bpf_object_open_opts *opts)
7389 {
7390         if (!obj_buf || obj_buf_sz == 0)
7391                 return libbpf_err_ptr(-EINVAL);
7392
7393         return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7394 }
7395
7396 static int bpf_object_unload(struct bpf_object *obj)
7397 {
7398         size_t i;
7399
7400         if (!obj)
7401                 return libbpf_err(-EINVAL);
7402
7403         for (i = 0; i < obj->nr_maps; i++) {
7404                 zclose(obj->maps[i].fd);
7405                 if (obj->maps[i].st_ops)
7406                         zfree(&obj->maps[i].st_ops->kern_vdata);
7407         }
7408
7409         for (i = 0; i < obj->nr_programs; i++)
7410                 bpf_program__unload(&obj->programs[i]);
7411
7412         return 0;
7413 }
7414
7415 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7416 {
7417         struct bpf_map *m;
7418
7419         bpf_object__for_each_map(m, obj) {
7420                 if (!bpf_map__is_internal(m))
7421                         continue;
7422                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7423                         m->def.map_flags &= ~BPF_F_MMAPABLE;
7424         }
7425
7426         return 0;
7427 }
7428
7429 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7430 {
7431         char sym_type, sym_name[500];
7432         unsigned long long sym_addr;
7433         int ret, err = 0;
7434         FILE *f;
7435
7436         f = fopen("/proc/kallsyms", "r");
7437         if (!f) {
7438                 err = -errno;
7439                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7440                 return err;
7441         }
7442
7443         while (true) {
7444                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7445                              &sym_addr, &sym_type, sym_name);
7446                 if (ret == EOF && feof(f))
7447                         break;
7448                 if (ret != 3) {
7449                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7450                         err = -EINVAL;
7451                         break;
7452                 }
7453
7454                 err = cb(sym_addr, sym_type, sym_name, ctx);
7455                 if (err)
7456                         break;
7457         }
7458
7459         fclose(f);
7460         return err;
7461 }
7462
7463 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7464                        const char *sym_name, void *ctx)
7465 {
7466         struct bpf_object *obj = ctx;
7467         const struct btf_type *t;
7468         struct extern_desc *ext;
7469
7470         ext = find_extern_by_name(obj, sym_name);
7471         if (!ext || ext->type != EXT_KSYM)
7472                 return 0;
7473
7474         t = btf__type_by_id(obj->btf, ext->btf_id);
7475         if (!btf_is_var(t))
7476                 return 0;
7477
7478         if (ext->is_set && ext->ksym.addr != sym_addr) {
7479                 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7480                         sym_name, ext->ksym.addr, sym_addr);
7481                 return -EINVAL;
7482         }
7483         if (!ext->is_set) {
7484                 ext->is_set = true;
7485                 ext->ksym.addr = sym_addr;
7486                 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7487         }
7488         return 0;
7489 }
7490
7491 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7492 {
7493         return libbpf_kallsyms_parse(kallsyms_cb, obj);
7494 }
7495
7496 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7497                             __u16 kind, struct btf **res_btf,
7498                             struct module_btf **res_mod_btf)
7499 {
7500         struct module_btf *mod_btf;
7501         struct btf *btf;
7502         int i, id, err;
7503
7504         btf = obj->btf_vmlinux;
7505         mod_btf = NULL;
7506         id = btf__find_by_name_kind(btf, ksym_name, kind);
7507
7508         if (id == -ENOENT) {
7509                 err = load_module_btfs(obj);
7510                 if (err)
7511                         return err;
7512
7513                 for (i = 0; i < obj->btf_module_cnt; i++) {
7514                         /* we assume module_btf's BTF FD is always >0 */
7515                         mod_btf = &obj->btf_modules[i];
7516                         btf = mod_btf->btf;
7517                         id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7518                         if (id != -ENOENT)
7519                                 break;
7520                 }
7521         }
7522         if (id <= 0)
7523                 return -ESRCH;
7524
7525         *res_btf = btf;
7526         *res_mod_btf = mod_btf;
7527         return id;
7528 }
7529
7530 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7531                                                struct extern_desc *ext)
7532 {
7533         const struct btf_type *targ_var, *targ_type;
7534         __u32 targ_type_id, local_type_id;
7535         struct module_btf *mod_btf = NULL;
7536         const char *targ_var_name;
7537         struct btf *btf = NULL;
7538         int id, err;
7539
7540         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7541         if (id < 0) {
7542                 if (id == -ESRCH && ext->is_weak)
7543                         return 0;
7544                 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7545                         ext->name);
7546                 return id;
7547         }
7548
7549         /* find local type_id */
7550         local_type_id = ext->ksym.type_id;
7551
7552         /* find target type_id */
7553         targ_var = btf__type_by_id(btf, id);
7554         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7555         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7556
7557         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7558                                         btf, targ_type_id);
7559         if (err <= 0) {
7560                 const struct btf_type *local_type;
7561                 const char *targ_name, *local_name;
7562
7563                 local_type = btf__type_by_id(obj->btf, local_type_id);
7564                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7565                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7566
7567                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7568                         ext->name, local_type_id,
7569                         btf_kind_str(local_type), local_name, targ_type_id,
7570                         btf_kind_str(targ_type), targ_name);
7571                 return -EINVAL;
7572         }
7573
7574         ext->is_set = true;
7575         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7576         ext->ksym.kernel_btf_id = id;
7577         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7578                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7579
7580         return 0;
7581 }
7582
7583 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7584                                                 struct extern_desc *ext)
7585 {
7586         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7587         struct module_btf *mod_btf = NULL;
7588         const struct btf_type *kern_func;
7589         struct btf *kern_btf = NULL;
7590         int ret;
7591
7592         local_func_proto_id = ext->ksym.type_id;
7593
7594         kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC, &kern_btf, &mod_btf);
7595         if (kfunc_id < 0) {
7596                 if (kfunc_id == -ESRCH && ext->is_weak)
7597                         return 0;
7598                 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7599                         ext->name);
7600                 return kfunc_id;
7601         }
7602
7603         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7604         kfunc_proto_id = kern_func->type;
7605
7606         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7607                                         kern_btf, kfunc_proto_id);
7608         if (ret <= 0) {
7609                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
7610                         ext->name, local_func_proto_id,
7611                         mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
7612                 return -EINVAL;
7613         }
7614
7615         /* set index for module BTF fd in fd_array, if unset */
7616         if (mod_btf && !mod_btf->fd_array_idx) {
7617                 /* insn->off is s16 */
7618                 if (obj->fd_array_cnt == INT16_MAX) {
7619                         pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
7620                                 ext->name, mod_btf->fd_array_idx);
7621                         return -E2BIG;
7622                 }
7623                 /* Cannot use index 0 for module BTF fd */
7624                 if (!obj->fd_array_cnt)
7625                         obj->fd_array_cnt = 1;
7626
7627                 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
7628                                         obj->fd_array_cnt + 1);
7629                 if (ret)
7630                         return ret;
7631                 mod_btf->fd_array_idx = obj->fd_array_cnt;
7632                 /* we assume module BTF FD is always >0 */
7633                 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
7634         }
7635
7636         ext->is_set = true;
7637         ext->ksym.kernel_btf_id = kfunc_id;
7638         ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
7639         /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
7640          * populates FD into ld_imm64 insn when it's used to point to kfunc.
7641          * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
7642          * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
7643          */
7644         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7645         pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
7646                  ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
7647
7648         return 0;
7649 }
7650
7651 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7652 {
7653         const struct btf_type *t;
7654         struct extern_desc *ext;
7655         int i, err;
7656
7657         for (i = 0; i < obj->nr_extern; i++) {
7658                 ext = &obj->externs[i];
7659                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7660                         continue;
7661
7662                 if (obj->gen_loader) {
7663                         ext->is_set = true;
7664                         ext->ksym.kernel_btf_obj_fd = 0;
7665                         ext->ksym.kernel_btf_id = 0;
7666                         continue;
7667                 }
7668                 t = btf__type_by_id(obj->btf, ext->btf_id);
7669                 if (btf_is_var(t))
7670                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7671                 else
7672                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
7673                 if (err)
7674                         return err;
7675         }
7676         return 0;
7677 }
7678
7679 static int bpf_object__resolve_externs(struct bpf_object *obj,
7680                                        const char *extra_kconfig)
7681 {
7682         bool need_config = false, need_kallsyms = false;
7683         bool need_vmlinux_btf = false;
7684         struct extern_desc *ext;
7685         void *kcfg_data = NULL;
7686         int err, i;
7687
7688         if (obj->nr_extern == 0)
7689                 return 0;
7690
7691         if (obj->kconfig_map_idx >= 0)
7692                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7693
7694         for (i = 0; i < obj->nr_extern; i++) {
7695                 ext = &obj->externs[i];
7696
7697                 if (ext->type == EXT_KSYM) {
7698                         if (ext->ksym.type_id)
7699                                 need_vmlinux_btf = true;
7700                         else
7701                                 need_kallsyms = true;
7702                         continue;
7703                 } else if (ext->type == EXT_KCFG) {
7704                         void *ext_ptr = kcfg_data + ext->kcfg.data_off;
7705                         __u64 value = 0;
7706
7707                         /* Kconfig externs need actual /proc/config.gz */
7708                         if (str_has_pfx(ext->name, "CONFIG_")) {
7709                                 need_config = true;
7710                                 continue;
7711                         }
7712
7713                         /* Virtual kcfg externs are customly handled by libbpf */
7714                         if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7715                                 value = get_kernel_version();
7716                                 if (!value) {
7717                                         pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
7718                                         return -EINVAL;
7719                                 }
7720                         } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
7721                                 value = kernel_supports(obj, FEAT_BPF_COOKIE);
7722                         } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
7723                                 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
7724                         } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
7725                                 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
7726                                  * __kconfig externs, where LINUX_ ones are virtual and filled out
7727                                  * customly by libbpf (their values don't come from Kconfig).
7728                                  * If LINUX_xxx variable is not recognized by libbpf, but is marked
7729                                  * __weak, it defaults to zero value, just like for CONFIG_xxx
7730                                  * externs.
7731                                  */
7732                                 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
7733                                 return -EINVAL;
7734                         }
7735
7736                         err = set_kcfg_value_num(ext, ext_ptr, value);
7737                         if (err)
7738                                 return err;
7739                         pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
7740                                  ext->name, (long long)value);
7741                 } else {
7742                         pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
7743                         return -EINVAL;
7744                 }
7745         }
7746         if (need_config && extra_kconfig) {
7747                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
7748                 if (err)
7749                         return -EINVAL;
7750                 need_config = false;
7751                 for (i = 0; i < obj->nr_extern; i++) {
7752                         ext = &obj->externs[i];
7753                         if (ext->type == EXT_KCFG && !ext->is_set) {
7754                                 need_config = true;
7755                                 break;
7756                         }
7757                 }
7758         }
7759         if (need_config) {
7760                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
7761                 if (err)
7762                         return -EINVAL;
7763         }
7764         if (need_kallsyms) {
7765                 err = bpf_object__read_kallsyms_file(obj);
7766                 if (err)
7767                         return -EINVAL;
7768         }
7769         if (need_vmlinux_btf) {
7770                 err = bpf_object__resolve_ksyms_btf_id(obj);
7771                 if (err)
7772                         return -EINVAL;
7773         }
7774         for (i = 0; i < obj->nr_extern; i++) {
7775                 ext = &obj->externs[i];
7776
7777                 if (!ext->is_set && !ext->is_weak) {
7778                         pr_warn("extern '%s' (strong): not resolved\n", ext->name);
7779                         return -ESRCH;
7780                 } else if (!ext->is_set) {
7781                         pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
7782                                  ext->name);
7783                 }
7784         }
7785
7786         return 0;
7787 }
7788
7789 static void bpf_map_prepare_vdata(const struct bpf_map *map)
7790 {
7791         struct bpf_struct_ops *st_ops;
7792         __u32 i;
7793
7794         st_ops = map->st_ops;
7795         for (i = 0; i < btf_vlen(st_ops->type); i++) {
7796                 struct bpf_program *prog = st_ops->progs[i];
7797                 void *kern_data;
7798                 int prog_fd;
7799
7800                 if (!prog)
7801                         continue;
7802
7803                 prog_fd = bpf_program__fd(prog);
7804                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
7805                 *(unsigned long *)kern_data = prog_fd;
7806         }
7807 }
7808
7809 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
7810 {
7811         int i;
7812
7813         for (i = 0; i < obj->nr_maps; i++)
7814                 if (bpf_map__is_struct_ops(&obj->maps[i]))
7815                         bpf_map_prepare_vdata(&obj->maps[i]);
7816
7817         return 0;
7818 }
7819
7820 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
7821 {
7822         int err, i;
7823
7824         if (!obj)
7825                 return libbpf_err(-EINVAL);
7826
7827         if (obj->loaded) {
7828                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
7829                 return libbpf_err(-EINVAL);
7830         }
7831
7832         if (obj->gen_loader)
7833                 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
7834
7835         err = bpf_object__probe_loading(obj);
7836         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
7837         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
7838         err = err ? : bpf_object__sanitize_and_load_btf(obj);
7839         err = err ? : bpf_object__sanitize_maps(obj);
7840         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
7841         err = err ? : bpf_object__create_maps(obj);
7842         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
7843         err = err ? : bpf_object__load_progs(obj, extra_log_level);
7844         err = err ? : bpf_object_init_prog_arrays(obj);
7845         err = err ? : bpf_object_prepare_struct_ops(obj);
7846
7847         if (obj->gen_loader) {
7848                 /* reset FDs */
7849                 if (obj->btf)
7850                         btf__set_fd(obj->btf, -1);
7851                 for (i = 0; i < obj->nr_maps; i++)
7852                         obj->maps[i].fd = -1;
7853                 if (!err)
7854                         err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
7855         }
7856
7857         /* clean up fd_array */
7858         zfree(&obj->fd_array);
7859
7860         /* clean up module BTFs */
7861         for (i = 0; i < obj->btf_module_cnt; i++) {
7862                 close(obj->btf_modules[i].fd);
7863                 btf__free(obj->btf_modules[i].btf);
7864                 free(obj->btf_modules[i].name);
7865         }
7866         free(obj->btf_modules);
7867
7868         /* clean up vmlinux BTF */
7869         btf__free(obj->btf_vmlinux);
7870         obj->btf_vmlinux = NULL;
7871
7872         obj->loaded = true; /* doesn't matter if successfully or not */
7873
7874         if (err)
7875                 goto out;
7876
7877         return 0;
7878 out:
7879         /* unpin any maps that were auto-pinned during load */
7880         for (i = 0; i < obj->nr_maps; i++)
7881                 if (obj->maps[i].pinned && !obj->maps[i].reused)
7882                         bpf_map__unpin(&obj->maps[i], NULL);
7883
7884         bpf_object_unload(obj);
7885         pr_warn("failed to load object '%s'\n", obj->path);
7886         return libbpf_err(err);
7887 }
7888
7889 int bpf_object__load(struct bpf_object *obj)
7890 {
7891         return bpf_object_load(obj, 0, NULL);
7892 }
7893
7894 static int make_parent_dir(const char *path)
7895 {
7896         char *cp, errmsg[STRERR_BUFSIZE];
7897         char *dname, *dir;
7898         int err = 0;
7899
7900         dname = strdup(path);
7901         if (dname == NULL)
7902                 return -ENOMEM;
7903
7904         dir = dirname(dname);
7905         if (mkdir(dir, 0700) && errno != EEXIST)
7906                 err = -errno;
7907
7908         free(dname);
7909         if (err) {
7910                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
7911                 pr_warn("failed to mkdir %s: %s\n", path, cp);
7912         }
7913         return err;
7914 }
7915
7916 static int check_path(const char *path)
7917 {
7918         char *cp, errmsg[STRERR_BUFSIZE];
7919         struct statfs st_fs;
7920         char *dname, *dir;
7921         int err = 0;
7922
7923         if (path == NULL)
7924                 return -EINVAL;
7925
7926         dname = strdup(path);
7927         if (dname == NULL)
7928                 return -ENOMEM;
7929
7930         dir = dirname(dname);
7931         if (statfs(dir, &st_fs)) {
7932                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7933                 pr_warn("failed to statfs %s: %s\n", dir, cp);
7934                 err = -errno;
7935         }
7936         free(dname);
7937
7938         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
7939                 pr_warn("specified path %s is not on BPF FS\n", path);
7940                 err = -EINVAL;
7941         }
7942
7943         return err;
7944 }
7945
7946 int bpf_program__pin(struct bpf_program *prog, const char *path)
7947 {
7948         char *cp, errmsg[STRERR_BUFSIZE];
7949         int err;
7950
7951         if (prog->fd < 0) {
7952                 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
7953                 return libbpf_err(-EINVAL);
7954         }
7955
7956         err = make_parent_dir(path);
7957         if (err)
7958                 return libbpf_err(err);
7959
7960         err = check_path(path);
7961         if (err)
7962                 return libbpf_err(err);
7963
7964         if (bpf_obj_pin(prog->fd, path)) {
7965                 err = -errno;
7966                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
7967                 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
7968                 return libbpf_err(err);
7969         }
7970
7971         pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
7972         return 0;
7973 }
7974
7975 int bpf_program__unpin(struct bpf_program *prog, const char *path)
7976 {
7977         int err;
7978
7979         if (prog->fd < 0) {
7980                 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
7981                 return libbpf_err(-EINVAL);
7982         }
7983
7984         err = check_path(path);
7985         if (err)
7986                 return libbpf_err(err);
7987
7988         err = unlink(path);
7989         if (err)
7990                 return libbpf_err(-errno);
7991
7992         pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
7993         return 0;
7994 }
7995
7996 int bpf_map__pin(struct bpf_map *map, const char *path)
7997 {
7998         char *cp, errmsg[STRERR_BUFSIZE];
7999         int err;
8000
8001         if (map == NULL) {
8002                 pr_warn("invalid map pointer\n");
8003                 return libbpf_err(-EINVAL);
8004         }
8005
8006         if (map->pin_path) {
8007                 if (path && strcmp(path, map->pin_path)) {
8008                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8009                                 bpf_map__name(map), map->pin_path, path);
8010                         return libbpf_err(-EINVAL);
8011                 } else if (map->pinned) {
8012                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8013                                  bpf_map__name(map), map->pin_path);
8014                         return 0;
8015                 }
8016         } else {
8017                 if (!path) {
8018                         pr_warn("missing a path to pin map '%s' at\n",
8019                                 bpf_map__name(map));
8020                         return libbpf_err(-EINVAL);
8021                 } else if (map->pinned) {
8022                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8023                         return libbpf_err(-EEXIST);
8024                 }
8025
8026                 map->pin_path = strdup(path);
8027                 if (!map->pin_path) {
8028                         err = -errno;
8029                         goto out_err;
8030                 }
8031         }
8032
8033         err = make_parent_dir(map->pin_path);
8034         if (err)
8035                 return libbpf_err(err);
8036
8037         err = check_path(map->pin_path);
8038         if (err)
8039                 return libbpf_err(err);
8040
8041         if (bpf_obj_pin(map->fd, map->pin_path)) {
8042                 err = -errno;
8043                 goto out_err;
8044         }
8045
8046         map->pinned = true;
8047         pr_debug("pinned map '%s'\n", map->pin_path);
8048
8049         return 0;
8050
8051 out_err:
8052         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8053         pr_warn("failed to pin map: %s\n", cp);
8054         return libbpf_err(err);
8055 }
8056
8057 int bpf_map__unpin(struct bpf_map *map, const char *path)
8058 {
8059         int err;
8060
8061         if (map == NULL) {
8062                 pr_warn("invalid map pointer\n");
8063                 return libbpf_err(-EINVAL);
8064         }
8065
8066         if (map->pin_path) {
8067                 if (path && strcmp(path, map->pin_path)) {
8068                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8069                                 bpf_map__name(map), map->pin_path, path);
8070                         return libbpf_err(-EINVAL);
8071                 }
8072                 path = map->pin_path;
8073         } else if (!path) {
8074                 pr_warn("no path to unpin map '%s' from\n",
8075                         bpf_map__name(map));
8076                 return libbpf_err(-EINVAL);
8077         }
8078
8079         err = check_path(path);
8080         if (err)
8081                 return libbpf_err(err);
8082
8083         err = unlink(path);
8084         if (err != 0)
8085                 return libbpf_err(-errno);
8086
8087         map->pinned = false;
8088         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8089
8090         return 0;
8091 }
8092
8093 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8094 {
8095         char *new = NULL;
8096
8097         if (path) {
8098                 new = strdup(path);
8099                 if (!new)
8100                         return libbpf_err(-errno);
8101         }
8102
8103         free(map->pin_path);
8104         map->pin_path = new;
8105         return 0;
8106 }
8107
8108 __alias(bpf_map__pin_path)
8109 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8110
8111 const char *bpf_map__pin_path(const struct bpf_map *map)
8112 {
8113         return map->pin_path;
8114 }
8115
8116 bool bpf_map__is_pinned(const struct bpf_map *map)
8117 {
8118         return map->pinned;
8119 }
8120
8121 static void sanitize_pin_path(char *s)
8122 {
8123         /* bpffs disallows periods in path names */
8124         while (*s) {
8125                 if (*s == '.')
8126                         *s = '_';
8127                 s++;
8128         }
8129 }
8130
8131 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8132 {
8133         struct bpf_map *map;
8134         int err;
8135
8136         if (!obj)
8137                 return libbpf_err(-ENOENT);
8138
8139         if (!obj->loaded) {
8140                 pr_warn("object not yet loaded; load it first\n");
8141                 return libbpf_err(-ENOENT);
8142         }
8143
8144         bpf_object__for_each_map(map, obj) {
8145                 char *pin_path = NULL;
8146                 char buf[PATH_MAX];
8147
8148                 if (!map->autocreate)
8149                         continue;
8150
8151                 if (path) {
8152                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8153                         if (err)
8154                                 goto err_unpin_maps;
8155                         sanitize_pin_path(buf);
8156                         pin_path = buf;
8157                 } else if (!map->pin_path) {
8158                         continue;
8159                 }
8160
8161                 err = bpf_map__pin(map, pin_path);
8162                 if (err)
8163                         goto err_unpin_maps;
8164         }
8165
8166         return 0;
8167
8168 err_unpin_maps:
8169         while ((map = bpf_object__prev_map(obj, map))) {
8170                 if (!map->pin_path)
8171                         continue;
8172
8173                 bpf_map__unpin(map, NULL);
8174         }
8175
8176         return libbpf_err(err);
8177 }
8178
8179 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8180 {
8181         struct bpf_map *map;
8182         int err;
8183
8184         if (!obj)
8185                 return libbpf_err(-ENOENT);
8186
8187         bpf_object__for_each_map(map, obj) {
8188                 char *pin_path = NULL;
8189                 char buf[PATH_MAX];
8190
8191                 if (path) {
8192                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8193                         if (err)
8194                                 return libbpf_err(err);
8195                         sanitize_pin_path(buf);
8196                         pin_path = buf;
8197                 } else if (!map->pin_path) {
8198                         continue;
8199                 }
8200
8201                 err = bpf_map__unpin(map, pin_path);
8202                 if (err)
8203                         return libbpf_err(err);
8204         }
8205
8206         return 0;
8207 }
8208
8209 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8210 {
8211         struct bpf_program *prog;
8212         char buf[PATH_MAX];
8213         int err;
8214
8215         if (!obj)
8216                 return libbpf_err(-ENOENT);
8217
8218         if (!obj->loaded) {
8219                 pr_warn("object not yet loaded; load it first\n");
8220                 return libbpf_err(-ENOENT);
8221         }
8222
8223         bpf_object__for_each_program(prog, obj) {
8224                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8225                 if (err)
8226                         goto err_unpin_programs;
8227
8228                 err = bpf_program__pin(prog, buf);
8229                 if (err)
8230                         goto err_unpin_programs;
8231         }
8232
8233         return 0;
8234
8235 err_unpin_programs:
8236         while ((prog = bpf_object__prev_program(obj, prog))) {
8237                 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8238                         continue;
8239
8240                 bpf_program__unpin(prog, buf);
8241         }
8242
8243         return libbpf_err(err);
8244 }
8245
8246 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8247 {
8248         struct bpf_program *prog;
8249         int err;
8250
8251         if (!obj)
8252                 return libbpf_err(-ENOENT);
8253
8254         bpf_object__for_each_program(prog, obj) {
8255                 char buf[PATH_MAX];
8256
8257                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8258                 if (err)
8259                         return libbpf_err(err);
8260
8261                 err = bpf_program__unpin(prog, buf);
8262                 if (err)
8263                         return libbpf_err(err);
8264         }
8265
8266         return 0;
8267 }
8268
8269 int bpf_object__pin(struct bpf_object *obj, const char *path)
8270 {
8271         int err;
8272
8273         err = bpf_object__pin_maps(obj, path);
8274         if (err)
8275                 return libbpf_err(err);
8276
8277         err = bpf_object__pin_programs(obj, path);
8278         if (err) {
8279                 bpf_object__unpin_maps(obj, path);
8280                 return libbpf_err(err);
8281         }
8282
8283         return 0;
8284 }
8285
8286 static void bpf_map__destroy(struct bpf_map *map)
8287 {
8288         if (map->inner_map) {
8289                 bpf_map__destroy(map->inner_map);
8290                 zfree(&map->inner_map);
8291         }
8292
8293         zfree(&map->init_slots);
8294         map->init_slots_sz = 0;
8295
8296         if (map->mmaped) {
8297                 munmap(map->mmaped, bpf_map_mmap_sz(map));
8298                 map->mmaped = NULL;
8299         }
8300
8301         if (map->st_ops) {
8302                 zfree(&map->st_ops->data);
8303                 zfree(&map->st_ops->progs);
8304                 zfree(&map->st_ops->kern_func_off);
8305                 zfree(&map->st_ops);
8306         }
8307
8308         zfree(&map->name);
8309         zfree(&map->real_name);
8310         zfree(&map->pin_path);
8311
8312         if (map->fd >= 0)
8313                 zclose(map->fd);
8314 }
8315
8316 void bpf_object__close(struct bpf_object *obj)
8317 {
8318         size_t i;
8319
8320         if (IS_ERR_OR_NULL(obj))
8321                 return;
8322
8323         usdt_manager_free(obj->usdt_man);
8324         obj->usdt_man = NULL;
8325
8326         bpf_gen__free(obj->gen_loader);
8327         bpf_object__elf_finish(obj);
8328         bpf_object_unload(obj);
8329         btf__free(obj->btf);
8330         btf_ext__free(obj->btf_ext);
8331
8332         for (i = 0; i < obj->nr_maps; i++)
8333                 bpf_map__destroy(&obj->maps[i]);
8334
8335         zfree(&obj->btf_custom_path);
8336         zfree(&obj->kconfig);
8337         zfree(&obj->externs);
8338         obj->nr_extern = 0;
8339
8340         zfree(&obj->maps);
8341         obj->nr_maps = 0;
8342
8343         if (obj->programs && obj->nr_programs) {
8344                 for (i = 0; i < obj->nr_programs; i++)
8345                         bpf_program__exit(&obj->programs[i]);
8346         }
8347         zfree(&obj->programs);
8348
8349         free(obj);
8350 }
8351
8352 const char *bpf_object__name(const struct bpf_object *obj)
8353 {
8354         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8355 }
8356
8357 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8358 {
8359         return obj ? obj->kern_version : 0;
8360 }
8361
8362 struct btf *bpf_object__btf(const struct bpf_object *obj)
8363 {
8364         return obj ? obj->btf : NULL;
8365 }
8366
8367 int bpf_object__btf_fd(const struct bpf_object *obj)
8368 {
8369         return obj->btf ? btf__fd(obj->btf) : -1;
8370 }
8371
8372 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8373 {
8374         if (obj->loaded)
8375                 return libbpf_err(-EINVAL);
8376
8377         obj->kern_version = kern_version;
8378
8379         return 0;
8380 }
8381
8382 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8383 {
8384         struct bpf_gen *gen;
8385
8386         if (!opts)
8387                 return -EFAULT;
8388         if (!OPTS_VALID(opts, gen_loader_opts))
8389                 return -EINVAL;
8390         gen = calloc(sizeof(*gen), 1);
8391         if (!gen)
8392                 return -ENOMEM;
8393         gen->opts = opts;
8394         obj->gen_loader = gen;
8395         return 0;
8396 }
8397
8398 static struct bpf_program *
8399 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8400                     bool forward)
8401 {
8402         size_t nr_programs = obj->nr_programs;
8403         ssize_t idx;
8404
8405         if (!nr_programs)
8406                 return NULL;
8407
8408         if (!p)
8409                 /* Iter from the beginning */
8410                 return forward ? &obj->programs[0] :
8411                         &obj->programs[nr_programs - 1];
8412
8413         if (p->obj != obj) {
8414                 pr_warn("error: program handler doesn't match object\n");
8415                 return errno = EINVAL, NULL;
8416         }
8417
8418         idx = (p - obj->programs) + (forward ? 1 : -1);
8419         if (idx >= obj->nr_programs || idx < 0)
8420                 return NULL;
8421         return &obj->programs[idx];
8422 }
8423
8424 struct bpf_program *
8425 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8426 {
8427         struct bpf_program *prog = prev;
8428
8429         do {
8430                 prog = __bpf_program__iter(prog, obj, true);
8431         } while (prog && prog_is_subprog(obj, prog));
8432
8433         return prog;
8434 }
8435
8436 struct bpf_program *
8437 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8438 {
8439         struct bpf_program *prog = next;
8440
8441         do {
8442                 prog = __bpf_program__iter(prog, obj, false);
8443         } while (prog && prog_is_subprog(obj, prog));
8444
8445         return prog;
8446 }
8447
8448 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8449 {
8450         prog->prog_ifindex = ifindex;
8451 }
8452
8453 const char *bpf_program__name(const struct bpf_program *prog)
8454 {
8455         return prog->name;
8456 }
8457
8458 const char *bpf_program__section_name(const struct bpf_program *prog)
8459 {
8460         return prog->sec_name;
8461 }
8462
8463 bool bpf_program__autoload(const struct bpf_program *prog)
8464 {
8465         return prog->autoload;
8466 }
8467
8468 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8469 {
8470         if (prog->obj->loaded)
8471                 return libbpf_err(-EINVAL);
8472
8473         prog->autoload = autoload;
8474         return 0;
8475 }
8476
8477 bool bpf_program__autoattach(const struct bpf_program *prog)
8478 {
8479         return prog->autoattach;
8480 }
8481
8482 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
8483 {
8484         prog->autoattach = autoattach;
8485 }
8486
8487 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8488 {
8489         return prog->insns;
8490 }
8491
8492 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8493 {
8494         return prog->insns_cnt;
8495 }
8496
8497 int bpf_program__set_insns(struct bpf_program *prog,
8498                            struct bpf_insn *new_insns, size_t new_insn_cnt)
8499 {
8500         struct bpf_insn *insns;
8501
8502         if (prog->obj->loaded)
8503                 return -EBUSY;
8504
8505         insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8506         if (!insns) {
8507                 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8508                 return -ENOMEM;
8509         }
8510         memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8511
8512         prog->insns = insns;
8513         prog->insns_cnt = new_insn_cnt;
8514         return 0;
8515 }
8516
8517 int bpf_program__fd(const struct bpf_program *prog)
8518 {
8519         if (!prog)
8520                 return libbpf_err(-EINVAL);
8521
8522         if (prog->fd < 0)
8523                 return libbpf_err(-ENOENT);
8524
8525         return prog->fd;
8526 }
8527
8528 __alias(bpf_program__type)
8529 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8530
8531 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8532 {
8533         return prog->type;
8534 }
8535
8536 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8537 {
8538         if (prog->obj->loaded)
8539                 return libbpf_err(-EBUSY);
8540
8541         prog->type = type;
8542         prog->sec_def = NULL;
8543         return 0;
8544 }
8545
8546 __alias(bpf_program__expected_attach_type)
8547 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
8548
8549 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
8550 {
8551         return prog->expected_attach_type;
8552 }
8553
8554 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
8555                                            enum bpf_attach_type type)
8556 {
8557         if (prog->obj->loaded)
8558                 return libbpf_err(-EBUSY);
8559
8560         prog->expected_attach_type = type;
8561         return 0;
8562 }
8563
8564 __u32 bpf_program__flags(const struct bpf_program *prog)
8565 {
8566         return prog->prog_flags;
8567 }
8568
8569 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
8570 {
8571         if (prog->obj->loaded)
8572                 return libbpf_err(-EBUSY);
8573
8574         prog->prog_flags = flags;
8575         return 0;
8576 }
8577
8578 __u32 bpf_program__log_level(const struct bpf_program *prog)
8579 {
8580         return prog->log_level;
8581 }
8582
8583 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
8584 {
8585         if (prog->obj->loaded)
8586                 return libbpf_err(-EBUSY);
8587
8588         prog->log_level = log_level;
8589         return 0;
8590 }
8591
8592 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
8593 {
8594         *log_size = prog->log_size;
8595         return prog->log_buf;
8596 }
8597
8598 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
8599 {
8600         if (log_size && !log_buf)
8601                 return -EINVAL;
8602         if (prog->log_size > UINT_MAX)
8603                 return -EINVAL;
8604         if (prog->obj->loaded)
8605                 return -EBUSY;
8606
8607         prog->log_buf = log_buf;
8608         prog->log_size = log_size;
8609         return 0;
8610 }
8611
8612 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                        \
8613         .sec = (char *)sec_pfx,                                             \
8614         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
8615         .expected_attach_type = atype,                                      \
8616         .cookie = (long)(flags),                                            \
8617         .prog_prepare_load_fn = libbpf_prepare_prog_load,                   \
8618         __VA_ARGS__                                                         \
8619 }
8620
8621 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8622 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8623 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8624 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8625 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8626 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8627 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8628 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8629 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8630 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8631
8632 static const struct bpf_sec_def section_defs[] = {
8633         SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
8634         SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
8635         SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
8636         SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
8637         SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
8638         SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8639         SEC_DEF("kretprobe+",           KPROBE, 0, SEC_NONE, attach_kprobe),
8640         SEC_DEF("uretprobe+",           KPROBE, 0, SEC_NONE, attach_uprobe),
8641         SEC_DEF("uretprobe.s+",         KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8642         SEC_DEF("kprobe.multi+",        KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8643         SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8644         SEC_DEF("ksyscall+",            KPROBE, 0, SEC_NONE, attach_ksyscall),
8645         SEC_DEF("kretsyscall+",         KPROBE, 0, SEC_NONE, attach_ksyscall),
8646         SEC_DEF("usdt+",                KPROBE, 0, SEC_NONE, attach_usdt),
8647         SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE),
8648         SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE),
8649         SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE),
8650         SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
8651         SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
8652         SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8653         SEC_DEF("raw_tp+",              RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8654         SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8655         SEC_DEF("raw_tp.w+",            RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8656         SEC_DEF("tp_btf+",              TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
8657         SEC_DEF("fentry+",              TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
8658         SEC_DEF("fmod_ret+",            TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
8659         SEC_DEF("fexit+",               TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
8660         SEC_DEF("fentry.s+",            TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8661         SEC_DEF("fmod_ret.s+",          TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8662         SEC_DEF("fexit.s+",             TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8663         SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
8664         SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
8665         SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
8666         SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
8667         SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
8668         SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
8669         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
8670         SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
8671         SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
8672         SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
8673         SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
8674         SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
8675         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
8676         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
8677         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
8678         SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
8679         SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
8680         SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
8681         SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
8682         SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
8683         SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
8684         SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
8685         SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
8686         SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
8687         SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
8688         SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
8689         SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
8690         SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
8691         SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
8692         SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
8693         SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
8694         SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
8695         SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
8696         SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
8697         SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
8698         SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
8699         SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
8700         SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
8701         SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
8702         SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
8703         SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
8704         SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
8705         SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
8706         SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
8707         SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
8708         SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
8709         SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
8710         SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
8711         SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
8712         SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
8713         SEC_DEF("struct_ops.s+",        STRUCT_OPS, 0, SEC_SLEEPABLE),
8714         SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
8715         SEC_DEF("netfilter",            NETFILTER, 0, SEC_NONE),
8716 };
8717
8718 static size_t custom_sec_def_cnt;
8719 static struct bpf_sec_def *custom_sec_defs;
8720 static struct bpf_sec_def custom_fallback_def;
8721 static bool has_custom_fallback_def;
8722
8723 static int last_custom_sec_def_handler_id;
8724
8725 int libbpf_register_prog_handler(const char *sec,
8726                                  enum bpf_prog_type prog_type,
8727                                  enum bpf_attach_type exp_attach_type,
8728                                  const struct libbpf_prog_handler_opts *opts)
8729 {
8730         struct bpf_sec_def *sec_def;
8731
8732         if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
8733                 return libbpf_err(-EINVAL);
8734
8735         if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
8736                 return libbpf_err(-E2BIG);
8737
8738         if (sec) {
8739                 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
8740                                               sizeof(*sec_def));
8741                 if (!sec_def)
8742                         return libbpf_err(-ENOMEM);
8743
8744                 custom_sec_defs = sec_def;
8745                 sec_def = &custom_sec_defs[custom_sec_def_cnt];
8746         } else {
8747                 if (has_custom_fallback_def)
8748                         return libbpf_err(-EBUSY);
8749
8750                 sec_def = &custom_fallback_def;
8751         }
8752
8753         sec_def->sec = sec ? strdup(sec) : NULL;
8754         if (sec && !sec_def->sec)
8755                 return libbpf_err(-ENOMEM);
8756
8757         sec_def->prog_type = prog_type;
8758         sec_def->expected_attach_type = exp_attach_type;
8759         sec_def->cookie = OPTS_GET(opts, cookie, 0);
8760
8761         sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
8762         sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
8763         sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
8764
8765         sec_def->handler_id = ++last_custom_sec_def_handler_id;
8766
8767         if (sec)
8768                 custom_sec_def_cnt++;
8769         else
8770                 has_custom_fallback_def = true;
8771
8772         return sec_def->handler_id;
8773 }
8774
8775 int libbpf_unregister_prog_handler(int handler_id)
8776 {
8777         struct bpf_sec_def *sec_defs;
8778         int i;
8779
8780         if (handler_id <= 0)
8781                 return libbpf_err(-EINVAL);
8782
8783         if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
8784                 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
8785                 has_custom_fallback_def = false;
8786                 return 0;
8787         }
8788
8789         for (i = 0; i < custom_sec_def_cnt; i++) {
8790                 if (custom_sec_defs[i].handler_id == handler_id)
8791                         break;
8792         }
8793
8794         if (i == custom_sec_def_cnt)
8795                 return libbpf_err(-ENOENT);
8796
8797         free(custom_sec_defs[i].sec);
8798         for (i = i + 1; i < custom_sec_def_cnt; i++)
8799                 custom_sec_defs[i - 1] = custom_sec_defs[i];
8800         custom_sec_def_cnt--;
8801
8802         /* try to shrink the array, but it's ok if we couldn't */
8803         sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
8804         if (sec_defs)
8805                 custom_sec_defs = sec_defs;
8806
8807         return 0;
8808 }
8809
8810 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
8811 {
8812         size_t len = strlen(sec_def->sec);
8813
8814         /* "type/" always has to have proper SEC("type/extras") form */
8815         if (sec_def->sec[len - 1] == '/') {
8816                 if (str_has_pfx(sec_name, sec_def->sec))
8817                         return true;
8818                 return false;
8819         }
8820
8821         /* "type+" means it can be either exact SEC("type") or
8822          * well-formed SEC("type/extras") with proper '/' separator
8823          */
8824         if (sec_def->sec[len - 1] == '+') {
8825                 len--;
8826                 /* not even a prefix */
8827                 if (strncmp(sec_name, sec_def->sec, len) != 0)
8828                         return false;
8829                 /* exact match or has '/' separator */
8830                 if (sec_name[len] == '\0' || sec_name[len] == '/')
8831                         return true;
8832                 return false;
8833         }
8834
8835         return strcmp(sec_name, sec_def->sec) == 0;
8836 }
8837
8838 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
8839 {
8840         const struct bpf_sec_def *sec_def;
8841         int i, n;
8842
8843         n = custom_sec_def_cnt;
8844         for (i = 0; i < n; i++) {
8845                 sec_def = &custom_sec_defs[i];
8846                 if (sec_def_matches(sec_def, sec_name))
8847                         return sec_def;
8848         }
8849
8850         n = ARRAY_SIZE(section_defs);
8851         for (i = 0; i < n; i++) {
8852                 sec_def = &section_defs[i];
8853                 if (sec_def_matches(sec_def, sec_name))
8854                         return sec_def;
8855         }
8856
8857         if (has_custom_fallback_def)
8858                 return &custom_fallback_def;
8859
8860         return NULL;
8861 }
8862
8863 #define MAX_TYPE_NAME_SIZE 32
8864
8865 static char *libbpf_get_type_names(bool attach_type)
8866 {
8867         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
8868         char *buf;
8869
8870         buf = malloc(len);
8871         if (!buf)
8872                 return NULL;
8873
8874         buf[0] = '\0';
8875         /* Forge string buf with all available names */
8876         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
8877                 const struct bpf_sec_def *sec_def = &section_defs[i];
8878
8879                 if (attach_type) {
8880                         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
8881                                 continue;
8882
8883                         if (!(sec_def->cookie & SEC_ATTACHABLE))
8884                                 continue;
8885                 }
8886
8887                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
8888                         free(buf);
8889                         return NULL;
8890                 }
8891                 strcat(buf, " ");
8892                 strcat(buf, section_defs[i].sec);
8893         }
8894
8895         return buf;
8896 }
8897
8898 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
8899                              enum bpf_attach_type *expected_attach_type)
8900 {
8901         const struct bpf_sec_def *sec_def;
8902         char *type_names;
8903
8904         if (!name)
8905                 return libbpf_err(-EINVAL);
8906
8907         sec_def = find_sec_def(name);
8908         if (sec_def) {
8909                 *prog_type = sec_def->prog_type;
8910                 *expected_attach_type = sec_def->expected_attach_type;
8911                 return 0;
8912         }
8913
8914         pr_debug("failed to guess program type from ELF section '%s'\n", name);
8915         type_names = libbpf_get_type_names(false);
8916         if (type_names != NULL) {
8917                 pr_debug("supported section(type) names are:%s\n", type_names);
8918                 free(type_names);
8919         }
8920
8921         return libbpf_err(-ESRCH);
8922 }
8923
8924 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
8925 {
8926         if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
8927                 return NULL;
8928
8929         return attach_type_name[t];
8930 }
8931
8932 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
8933 {
8934         if (t < 0 || t >= ARRAY_SIZE(link_type_name))
8935                 return NULL;
8936
8937         return link_type_name[t];
8938 }
8939
8940 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
8941 {
8942         if (t < 0 || t >= ARRAY_SIZE(map_type_name))
8943                 return NULL;
8944
8945         return map_type_name[t];
8946 }
8947
8948 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
8949 {
8950         if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
8951                 return NULL;
8952
8953         return prog_type_name[t];
8954 }
8955
8956 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
8957                                                      int sec_idx,
8958                                                      size_t offset)
8959 {
8960         struct bpf_map *map;
8961         size_t i;
8962
8963         for (i = 0; i < obj->nr_maps; i++) {
8964                 map = &obj->maps[i];
8965                 if (!bpf_map__is_struct_ops(map))
8966                         continue;
8967                 if (map->sec_idx == sec_idx &&
8968                     map->sec_offset <= offset &&
8969                     offset - map->sec_offset < map->def.value_size)
8970                         return map;
8971         }
8972
8973         return NULL;
8974 }
8975
8976 /* Collect the reloc from ELF and populate the st_ops->progs[] */
8977 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
8978                                             Elf64_Shdr *shdr, Elf_Data *data)
8979 {
8980         const struct btf_member *member;
8981         struct bpf_struct_ops *st_ops;
8982         struct bpf_program *prog;
8983         unsigned int shdr_idx;
8984         const struct btf *btf;
8985         struct bpf_map *map;
8986         unsigned int moff, insn_idx;
8987         const char *name;
8988         __u32 member_idx;
8989         Elf64_Sym *sym;
8990         Elf64_Rel *rel;
8991         int i, nrels;
8992
8993         btf = obj->btf;
8994         nrels = shdr->sh_size / shdr->sh_entsize;
8995         for (i = 0; i < nrels; i++) {
8996                 rel = elf_rel_by_idx(data, i);
8997                 if (!rel) {
8998                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
8999                         return -LIBBPF_ERRNO__FORMAT;
9000                 }
9001
9002                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9003                 if (!sym) {
9004                         pr_warn("struct_ops reloc: symbol %zx not found\n",
9005                                 (size_t)ELF64_R_SYM(rel->r_info));
9006                         return -LIBBPF_ERRNO__FORMAT;
9007                 }
9008
9009                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9010                 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9011                 if (!map) {
9012                         pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9013                                 (size_t)rel->r_offset);
9014                         return -EINVAL;
9015                 }
9016
9017                 moff = rel->r_offset - map->sec_offset;
9018                 shdr_idx = sym->st_shndx;
9019                 st_ops = map->st_ops;
9020                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9021                          map->name,
9022                          (long long)(rel->r_info >> 32),
9023                          (long long)sym->st_value,
9024                          shdr_idx, (size_t)rel->r_offset,
9025                          map->sec_offset, sym->st_name, name);
9026
9027                 if (shdr_idx >= SHN_LORESERVE) {
9028                         pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9029                                 map->name, (size_t)rel->r_offset, shdr_idx);
9030                         return -LIBBPF_ERRNO__RELOC;
9031                 }
9032                 if (sym->st_value % BPF_INSN_SZ) {
9033                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9034                                 map->name, (unsigned long long)sym->st_value);
9035                         return -LIBBPF_ERRNO__FORMAT;
9036                 }
9037                 insn_idx = sym->st_value / BPF_INSN_SZ;
9038
9039                 member = find_member_by_offset(st_ops->type, moff * 8);
9040                 if (!member) {
9041                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9042                                 map->name, moff);
9043                         return -EINVAL;
9044                 }
9045                 member_idx = member - btf_members(st_ops->type);
9046                 name = btf__name_by_offset(btf, member->name_off);
9047
9048                 if (!resolve_func_ptr(btf, member->type, NULL)) {
9049                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9050                                 map->name, name);
9051                         return -EINVAL;
9052                 }
9053
9054                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9055                 if (!prog) {
9056                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9057                                 map->name, shdr_idx, name);
9058                         return -EINVAL;
9059                 }
9060
9061                 /* prevent the use of BPF prog with invalid type */
9062                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9063                         pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9064                                 map->name, prog->name);
9065                         return -EINVAL;
9066                 }
9067
9068                 /* if we haven't yet processed this BPF program, record proper
9069                  * attach_btf_id and member_idx
9070                  */
9071                 if (!prog->attach_btf_id) {
9072                         prog->attach_btf_id = st_ops->type_id;
9073                         prog->expected_attach_type = member_idx;
9074                 }
9075
9076                 /* struct_ops BPF prog can be re-used between multiple
9077                  * .struct_ops & .struct_ops.link as long as it's the
9078                  * same struct_ops struct definition and the same
9079                  * function pointer field
9080                  */
9081                 if (prog->attach_btf_id != st_ops->type_id ||
9082                     prog->expected_attach_type != member_idx) {
9083                         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
9084                                 map->name, prog->name, prog->sec_name, prog->type,
9085                                 prog->attach_btf_id, prog->expected_attach_type, name);
9086                         return -EINVAL;
9087                 }
9088
9089                 st_ops->progs[member_idx] = prog;
9090         }
9091
9092         return 0;
9093 }
9094
9095 #define BTF_TRACE_PREFIX "btf_trace_"
9096 #define BTF_LSM_PREFIX "bpf_lsm_"
9097 #define BTF_ITER_PREFIX "bpf_iter_"
9098 #define BTF_MAX_NAME_SIZE 128
9099
9100 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9101                                 const char **prefix, int *kind)
9102 {
9103         switch (attach_type) {
9104         case BPF_TRACE_RAW_TP:
9105                 *prefix = BTF_TRACE_PREFIX;
9106                 *kind = BTF_KIND_TYPEDEF;
9107                 break;
9108         case BPF_LSM_MAC:
9109         case BPF_LSM_CGROUP:
9110                 *prefix = BTF_LSM_PREFIX;
9111                 *kind = BTF_KIND_FUNC;
9112                 break;
9113         case BPF_TRACE_ITER:
9114                 *prefix = BTF_ITER_PREFIX;
9115                 *kind = BTF_KIND_FUNC;
9116                 break;
9117         default:
9118                 *prefix = "";
9119                 *kind = BTF_KIND_FUNC;
9120         }
9121 }
9122
9123 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9124                                    const char *name, __u32 kind)
9125 {
9126         char btf_type_name[BTF_MAX_NAME_SIZE];
9127         int ret;
9128
9129         ret = snprintf(btf_type_name, sizeof(btf_type_name),
9130                        "%s%s", prefix, name);
9131         /* snprintf returns the number of characters written excluding the
9132          * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9133          * indicates truncation.
9134          */
9135         if (ret < 0 || ret >= sizeof(btf_type_name))
9136                 return -ENAMETOOLONG;
9137         return btf__find_by_name_kind(btf, btf_type_name, kind);
9138 }
9139
9140 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9141                                      enum bpf_attach_type attach_type)
9142 {
9143         const char *prefix;
9144         int kind;
9145
9146         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9147         return find_btf_by_prefix_kind(btf, prefix, name, kind);
9148 }
9149
9150 int libbpf_find_vmlinux_btf_id(const char *name,
9151                                enum bpf_attach_type attach_type)
9152 {
9153         struct btf *btf;
9154         int err;
9155
9156         btf = btf__load_vmlinux_btf();
9157         err = libbpf_get_error(btf);
9158         if (err) {
9159                 pr_warn("vmlinux BTF is not found\n");
9160                 return libbpf_err(err);
9161         }
9162
9163         err = find_attach_btf_id(btf, name, attach_type);
9164         if (err <= 0)
9165                 pr_warn("%s is not found in vmlinux BTF\n", name);
9166
9167         btf__free(btf);
9168         return libbpf_err(err);
9169 }
9170
9171 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9172 {
9173         struct bpf_prog_info info;
9174         __u32 info_len = sizeof(info);
9175         struct btf *btf;
9176         int err;
9177
9178         memset(&info, 0, info_len);
9179         err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9180         if (err) {
9181                 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9182                         attach_prog_fd, err);
9183                 return err;
9184         }
9185
9186         err = -EINVAL;
9187         if (!info.btf_id) {
9188                 pr_warn("The target program doesn't have BTF\n");
9189                 goto out;
9190         }
9191         btf = btf__load_from_kernel_by_id(info.btf_id);
9192         err = libbpf_get_error(btf);
9193         if (err) {
9194                 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9195                 goto out;
9196         }
9197         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9198         btf__free(btf);
9199         if (err <= 0) {
9200                 pr_warn("%s is not found in prog's BTF\n", name);
9201                 goto out;
9202         }
9203 out:
9204         return err;
9205 }
9206
9207 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9208                               enum bpf_attach_type attach_type,
9209                               int *btf_obj_fd, int *btf_type_id)
9210 {
9211         int ret, i;
9212
9213         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9214         if (ret > 0) {
9215                 *btf_obj_fd = 0; /* vmlinux BTF */
9216                 *btf_type_id = ret;
9217                 return 0;
9218         }
9219         if (ret != -ENOENT)
9220                 return ret;
9221
9222         ret = load_module_btfs(obj);
9223         if (ret)
9224                 return ret;
9225
9226         for (i = 0; i < obj->btf_module_cnt; i++) {
9227                 const struct module_btf *mod = &obj->btf_modules[i];
9228
9229                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9230                 if (ret > 0) {
9231                         *btf_obj_fd = mod->fd;
9232                         *btf_type_id = ret;
9233                         return 0;
9234                 }
9235                 if (ret == -ENOENT)
9236                         continue;
9237
9238                 return ret;
9239         }
9240
9241         return -ESRCH;
9242 }
9243
9244 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9245                                      int *btf_obj_fd, int *btf_type_id)
9246 {
9247         enum bpf_attach_type attach_type = prog->expected_attach_type;
9248         __u32 attach_prog_fd = prog->attach_prog_fd;
9249         int err = 0;
9250
9251         /* BPF program's BTF ID */
9252         if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9253                 if (!attach_prog_fd) {
9254                         pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9255                         return -EINVAL;
9256                 }
9257                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9258                 if (err < 0) {
9259                         pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9260                                  prog->name, attach_prog_fd, attach_name, err);
9261                         return err;
9262                 }
9263                 *btf_obj_fd = 0;
9264                 *btf_type_id = err;
9265                 return 0;
9266         }
9267
9268         /* kernel/module BTF ID */
9269         if (prog->obj->gen_loader) {
9270                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9271                 *btf_obj_fd = 0;
9272                 *btf_type_id = 1;
9273         } else {
9274                 err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
9275         }
9276         if (err) {
9277                 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9278                         prog->name, attach_name, err);
9279                 return err;
9280         }
9281         return 0;
9282 }
9283
9284 int libbpf_attach_type_by_name(const char *name,
9285                                enum bpf_attach_type *attach_type)
9286 {
9287         char *type_names;
9288         const struct bpf_sec_def *sec_def;
9289
9290         if (!name)
9291                 return libbpf_err(-EINVAL);
9292
9293         sec_def = find_sec_def(name);
9294         if (!sec_def) {
9295                 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9296                 type_names = libbpf_get_type_names(true);
9297                 if (type_names != NULL) {
9298                         pr_debug("attachable section(type) names are:%s\n", type_names);
9299                         free(type_names);
9300                 }
9301
9302                 return libbpf_err(-EINVAL);
9303         }
9304
9305         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9306                 return libbpf_err(-EINVAL);
9307         if (!(sec_def->cookie & SEC_ATTACHABLE))
9308                 return libbpf_err(-EINVAL);
9309
9310         *attach_type = sec_def->expected_attach_type;
9311         return 0;
9312 }
9313
9314 int bpf_map__fd(const struct bpf_map *map)
9315 {
9316         return map ? map->fd : libbpf_err(-EINVAL);
9317 }
9318
9319 static bool map_uses_real_name(const struct bpf_map *map)
9320 {
9321         /* Since libbpf started to support custom .data.* and .rodata.* maps,
9322          * their user-visible name differs from kernel-visible name. Users see
9323          * such map's corresponding ELF section name as a map name.
9324          * This check distinguishes .data/.rodata from .data.* and .rodata.*
9325          * maps to know which name has to be returned to the user.
9326          */
9327         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9328                 return true;
9329         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9330                 return true;
9331         return false;
9332 }
9333
9334 const char *bpf_map__name(const struct bpf_map *map)
9335 {
9336         if (!map)
9337                 return NULL;
9338
9339         if (map_uses_real_name(map))
9340                 return map->real_name;
9341
9342         return map->name;
9343 }
9344
9345 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9346 {
9347         return map->def.type;
9348 }
9349
9350 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9351 {
9352         if (map->fd >= 0)
9353                 return libbpf_err(-EBUSY);
9354         map->def.type = type;
9355         return 0;
9356 }
9357
9358 __u32 bpf_map__map_flags(const struct bpf_map *map)
9359 {
9360         return map->def.map_flags;
9361 }
9362
9363 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9364 {
9365         if (map->fd >= 0)
9366                 return libbpf_err(-EBUSY);
9367         map->def.map_flags = flags;
9368         return 0;
9369 }
9370
9371 __u64 bpf_map__map_extra(const struct bpf_map *map)
9372 {
9373         return map->map_extra;
9374 }
9375
9376 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9377 {
9378         if (map->fd >= 0)
9379                 return libbpf_err(-EBUSY);
9380         map->map_extra = map_extra;
9381         return 0;
9382 }
9383
9384 __u32 bpf_map__numa_node(const struct bpf_map *map)
9385 {
9386         return map->numa_node;
9387 }
9388
9389 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9390 {
9391         if (map->fd >= 0)
9392                 return libbpf_err(-EBUSY);
9393         map->numa_node = numa_node;
9394         return 0;
9395 }
9396
9397 __u32 bpf_map__key_size(const struct bpf_map *map)
9398 {
9399         return map->def.key_size;
9400 }
9401
9402 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9403 {
9404         if (map->fd >= 0)
9405                 return libbpf_err(-EBUSY);
9406         map->def.key_size = size;
9407         return 0;
9408 }
9409
9410 __u32 bpf_map__value_size(const struct bpf_map *map)
9411 {
9412         return map->def.value_size;
9413 }
9414
9415 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9416 {
9417         if (map->fd >= 0)
9418                 return libbpf_err(-EBUSY);
9419         map->def.value_size = size;
9420         return 0;
9421 }
9422
9423 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9424 {
9425         return map ? map->btf_key_type_id : 0;
9426 }
9427
9428 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9429 {
9430         return map ? map->btf_value_type_id : 0;
9431 }
9432
9433 int bpf_map__set_initial_value(struct bpf_map *map,
9434                                const void *data, size_t size)
9435 {
9436         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9437             size != map->def.value_size || map->fd >= 0)
9438                 return libbpf_err(-EINVAL);
9439
9440         memcpy(map->mmaped, data, size);
9441         return 0;
9442 }
9443
9444 const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9445 {
9446         if (!map->mmaped)
9447                 return NULL;
9448         *psize = map->def.value_size;
9449         return map->mmaped;
9450 }
9451
9452 bool bpf_map__is_internal(const struct bpf_map *map)
9453 {
9454         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9455 }
9456
9457 __u32 bpf_map__ifindex(const struct bpf_map *map)
9458 {
9459         return map->map_ifindex;
9460 }
9461
9462 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9463 {
9464         if (map->fd >= 0)
9465                 return libbpf_err(-EBUSY);
9466         map->map_ifindex = ifindex;
9467         return 0;
9468 }
9469
9470 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9471 {
9472         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9473                 pr_warn("error: unsupported map type\n");
9474                 return libbpf_err(-EINVAL);
9475         }
9476         if (map->inner_map_fd != -1) {
9477                 pr_warn("error: inner_map_fd already specified\n");
9478                 return libbpf_err(-EINVAL);
9479         }
9480         if (map->inner_map) {
9481                 bpf_map__destroy(map->inner_map);
9482                 zfree(&map->inner_map);
9483         }
9484         map->inner_map_fd = fd;
9485         return 0;
9486 }
9487
9488 static struct bpf_map *
9489 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9490 {
9491         ssize_t idx;
9492         struct bpf_map *s, *e;
9493
9494         if (!obj || !obj->maps)
9495                 return errno = EINVAL, NULL;
9496
9497         s = obj->maps;
9498         e = obj->maps + obj->nr_maps;
9499
9500         if ((m < s) || (m >= e)) {
9501                 pr_warn("error in %s: map handler doesn't belong to object\n",
9502                          __func__);
9503                 return errno = EINVAL, NULL;
9504         }
9505
9506         idx = (m - obj->maps) + i;
9507         if (idx >= obj->nr_maps || idx < 0)
9508                 return NULL;
9509         return &obj->maps[idx];
9510 }
9511
9512 struct bpf_map *
9513 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
9514 {
9515         if (prev == NULL)
9516                 return obj->maps;
9517
9518         return __bpf_map__iter(prev, obj, 1);
9519 }
9520
9521 struct bpf_map *
9522 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
9523 {
9524         if (next == NULL) {
9525                 if (!obj->nr_maps)
9526                         return NULL;
9527                 return obj->maps + obj->nr_maps - 1;
9528         }
9529
9530         return __bpf_map__iter(next, obj, -1);
9531 }
9532
9533 struct bpf_map *
9534 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9535 {
9536         struct bpf_map *pos;
9537
9538         bpf_object__for_each_map(pos, obj) {
9539                 /* if it's a special internal map name (which always starts
9540                  * with dot) then check if that special name matches the
9541                  * real map name (ELF section name)
9542                  */
9543                 if (name[0] == '.') {
9544                         if (pos->real_name && strcmp(pos->real_name, name) == 0)
9545                                 return pos;
9546                         continue;
9547                 }
9548                 /* otherwise map name has to be an exact match */
9549                 if (map_uses_real_name(pos)) {
9550                         if (strcmp(pos->real_name, name) == 0)
9551                                 return pos;
9552                         continue;
9553                 }
9554                 if (strcmp(pos->name, name) == 0)
9555                         return pos;
9556         }
9557         return errno = ENOENT, NULL;
9558 }
9559
9560 int
9561 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9562 {
9563         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9564 }
9565
9566 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
9567                            size_t value_sz, bool check_value_sz)
9568 {
9569         if (map->fd <= 0)
9570                 return -ENOENT;
9571
9572         if (map->def.key_size != key_sz) {
9573                 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
9574                         map->name, key_sz, map->def.key_size);
9575                 return -EINVAL;
9576         }
9577
9578         if (!check_value_sz)
9579                 return 0;
9580
9581         switch (map->def.type) {
9582         case BPF_MAP_TYPE_PERCPU_ARRAY:
9583         case BPF_MAP_TYPE_PERCPU_HASH:
9584         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
9585         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
9586                 int num_cpu = libbpf_num_possible_cpus();
9587                 size_t elem_sz = roundup(map->def.value_size, 8);
9588
9589                 if (value_sz != num_cpu * elem_sz) {
9590                         pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
9591                                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
9592                         return -EINVAL;
9593                 }
9594                 break;
9595         }
9596         default:
9597                 if (map->def.value_size != value_sz) {
9598                         pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
9599                                 map->name, value_sz, map->def.value_size);
9600                         return -EINVAL;
9601                 }
9602                 break;
9603         }
9604         return 0;
9605 }
9606
9607 int bpf_map__lookup_elem(const struct bpf_map *map,
9608                          const void *key, size_t key_sz,
9609                          void *value, size_t value_sz, __u64 flags)
9610 {
9611         int err;
9612
9613         err = validate_map_op(map, key_sz, value_sz, true);
9614         if (err)
9615                 return libbpf_err(err);
9616
9617         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
9618 }
9619
9620 int bpf_map__update_elem(const struct bpf_map *map,
9621                          const void *key, size_t key_sz,
9622                          const void *value, size_t value_sz, __u64 flags)
9623 {
9624         int err;
9625
9626         err = validate_map_op(map, key_sz, value_sz, true);
9627         if (err)
9628                 return libbpf_err(err);
9629
9630         return bpf_map_update_elem(map->fd, key, value, flags);
9631 }
9632
9633 int bpf_map__delete_elem(const struct bpf_map *map,
9634                          const void *key, size_t key_sz, __u64 flags)
9635 {
9636         int err;
9637
9638         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9639         if (err)
9640                 return libbpf_err(err);
9641
9642         return bpf_map_delete_elem_flags(map->fd, key, flags);
9643 }
9644
9645 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
9646                                     const void *key, size_t key_sz,
9647                                     void *value, size_t value_sz, __u64 flags)
9648 {
9649         int err;
9650
9651         err = validate_map_op(map, key_sz, value_sz, true);
9652         if (err)
9653                 return libbpf_err(err);
9654
9655         return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
9656 }
9657
9658 int bpf_map__get_next_key(const struct bpf_map *map,
9659                           const void *cur_key, void *next_key, size_t key_sz)
9660 {
9661         int err;
9662
9663         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
9664         if (err)
9665                 return libbpf_err(err);
9666
9667         return bpf_map_get_next_key(map->fd, cur_key, next_key);
9668 }
9669
9670 long libbpf_get_error(const void *ptr)
9671 {
9672         if (!IS_ERR_OR_NULL(ptr))
9673                 return 0;
9674
9675         if (IS_ERR(ptr))
9676                 errno = -PTR_ERR(ptr);
9677
9678         /* If ptr == NULL, then errno should be already set by the failing
9679          * API, because libbpf never returns NULL on success and it now always
9680          * sets errno on error. So no extra errno handling for ptr == NULL
9681          * case.
9682          */
9683         return -errno;
9684 }
9685
9686 /* Replace link's underlying BPF program with the new one */
9687 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
9688 {
9689         int ret;
9690
9691         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
9692         return libbpf_err_errno(ret);
9693 }
9694
9695 /* Release "ownership" of underlying BPF resource (typically, BPF program
9696  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
9697  * link, when destructed through bpf_link__destroy() call won't attempt to
9698  * detach/unregisted that BPF resource. This is useful in situations where,
9699  * say, attached BPF program has to outlive userspace program that attached it
9700  * in the system. Depending on type of BPF program, though, there might be
9701  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
9702  * exit of userspace program doesn't trigger automatic detachment and clean up
9703  * inside the kernel.
9704  */
9705 void bpf_link__disconnect(struct bpf_link *link)
9706 {
9707         link->disconnected = true;
9708 }
9709
9710 int bpf_link__destroy(struct bpf_link *link)
9711 {
9712         int err = 0;
9713
9714         if (IS_ERR_OR_NULL(link))
9715                 return 0;
9716
9717         if (!link->disconnected && link->detach)
9718                 err = link->detach(link);
9719         if (link->pin_path)
9720                 free(link->pin_path);
9721         if (link->dealloc)
9722                 link->dealloc(link);
9723         else
9724                 free(link);
9725
9726         return libbpf_err(err);
9727 }
9728
9729 int bpf_link__fd(const struct bpf_link *link)
9730 {
9731         return link->fd;
9732 }
9733
9734 const char *bpf_link__pin_path(const struct bpf_link *link)
9735 {
9736         return link->pin_path;
9737 }
9738
9739 static int bpf_link__detach_fd(struct bpf_link *link)
9740 {
9741         return libbpf_err_errno(close(link->fd));
9742 }
9743
9744 struct bpf_link *bpf_link__open(const char *path)
9745 {
9746         struct bpf_link *link;
9747         int fd;
9748
9749         fd = bpf_obj_get(path);
9750         if (fd < 0) {
9751                 fd = -errno;
9752                 pr_warn("failed to open link at %s: %d\n", path, fd);
9753                 return libbpf_err_ptr(fd);
9754         }
9755
9756         link = calloc(1, sizeof(*link));
9757         if (!link) {
9758                 close(fd);
9759                 return libbpf_err_ptr(-ENOMEM);
9760         }
9761         link->detach = &bpf_link__detach_fd;
9762         link->fd = fd;
9763
9764         link->pin_path = strdup(path);
9765         if (!link->pin_path) {
9766                 bpf_link__destroy(link);
9767                 return libbpf_err_ptr(-ENOMEM);
9768         }
9769
9770         return link;
9771 }
9772
9773 int bpf_link__detach(struct bpf_link *link)
9774 {
9775         return bpf_link_detach(link->fd) ? -errno : 0;
9776 }
9777
9778 int bpf_link__pin(struct bpf_link *link, const char *path)
9779 {
9780         int err;
9781
9782         if (link->pin_path)
9783                 return libbpf_err(-EBUSY);
9784         err = make_parent_dir(path);
9785         if (err)
9786                 return libbpf_err(err);
9787         err = check_path(path);
9788         if (err)
9789                 return libbpf_err(err);
9790
9791         link->pin_path = strdup(path);
9792         if (!link->pin_path)
9793                 return libbpf_err(-ENOMEM);
9794
9795         if (bpf_obj_pin(link->fd, link->pin_path)) {
9796                 err = -errno;
9797                 zfree(&link->pin_path);
9798                 return libbpf_err(err);
9799         }
9800
9801         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
9802         return 0;
9803 }
9804
9805 int bpf_link__unpin(struct bpf_link *link)
9806 {
9807         int err;
9808
9809         if (!link->pin_path)
9810                 return libbpf_err(-EINVAL);
9811
9812         err = unlink(link->pin_path);
9813         if (err != 0)
9814                 return -errno;
9815
9816         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
9817         zfree(&link->pin_path);
9818         return 0;
9819 }
9820
9821 struct bpf_link_perf {
9822         struct bpf_link link;
9823         int perf_event_fd;
9824         /* legacy kprobe support: keep track of probe identifier and type */
9825         char *legacy_probe_name;
9826         bool legacy_is_kprobe;
9827         bool legacy_is_retprobe;
9828 };
9829
9830 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
9831 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
9832
9833 static int bpf_link_perf_detach(struct bpf_link *link)
9834 {
9835         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9836         int err = 0;
9837
9838         if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
9839                 err = -errno;
9840
9841         if (perf_link->perf_event_fd != link->fd)
9842                 close(perf_link->perf_event_fd);
9843         close(link->fd);
9844
9845         /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
9846         if (perf_link->legacy_probe_name) {
9847                 if (perf_link->legacy_is_kprobe) {
9848                         err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
9849                                                          perf_link->legacy_is_retprobe);
9850                 } else {
9851                         err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
9852                                                          perf_link->legacy_is_retprobe);
9853                 }
9854         }
9855
9856         return err;
9857 }
9858
9859 static void bpf_link_perf_dealloc(struct bpf_link *link)
9860 {
9861         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
9862
9863         free(perf_link->legacy_probe_name);
9864         free(perf_link);
9865 }
9866
9867 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
9868                                                      const struct bpf_perf_event_opts *opts)
9869 {
9870         char errmsg[STRERR_BUFSIZE];
9871         struct bpf_link_perf *link;
9872         int prog_fd, link_fd = -1, err;
9873         bool force_ioctl_attach;
9874
9875         if (!OPTS_VALID(opts, bpf_perf_event_opts))
9876                 return libbpf_err_ptr(-EINVAL);
9877
9878         if (pfd < 0) {
9879                 pr_warn("prog '%s': invalid perf event FD %d\n",
9880                         prog->name, pfd);
9881                 return libbpf_err_ptr(-EINVAL);
9882         }
9883         prog_fd = bpf_program__fd(prog);
9884         if (prog_fd < 0) {
9885                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
9886                         prog->name);
9887                 return libbpf_err_ptr(-EINVAL);
9888         }
9889
9890         link = calloc(1, sizeof(*link));
9891         if (!link)
9892                 return libbpf_err_ptr(-ENOMEM);
9893         link->link.detach = &bpf_link_perf_detach;
9894         link->link.dealloc = &bpf_link_perf_dealloc;
9895         link->perf_event_fd = pfd;
9896
9897         force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
9898         if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
9899                 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
9900                         .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
9901
9902                 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
9903                 if (link_fd < 0) {
9904                         err = -errno;
9905                         pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
9906                                 prog->name, pfd,
9907                                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9908                         goto err_out;
9909                 }
9910                 link->link.fd = link_fd;
9911         } else {
9912                 if (OPTS_GET(opts, bpf_cookie, 0)) {
9913                         pr_warn("prog '%s': user context value is not supported\n", prog->name);
9914                         err = -EOPNOTSUPP;
9915                         goto err_out;
9916                 }
9917
9918                 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
9919                         err = -errno;
9920                         pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
9921                                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9922                         if (err == -EPROTO)
9923                                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
9924                                         prog->name, pfd);
9925                         goto err_out;
9926                 }
9927                 link->link.fd = pfd;
9928         }
9929         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
9930                 err = -errno;
9931                 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
9932                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
9933                 goto err_out;
9934         }
9935
9936         return &link->link;
9937 err_out:
9938         if (link_fd >= 0)
9939                 close(link_fd);
9940         free(link);
9941         return libbpf_err_ptr(err);
9942 }
9943
9944 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
9945 {
9946         return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
9947 }
9948
9949 /*
9950  * this function is expected to parse integer in the range of [0, 2^31-1] from
9951  * given file using scanf format string fmt. If actual parsed value is
9952  * negative, the result might be indistinguishable from error
9953  */
9954 static int parse_uint_from_file(const char *file, const char *fmt)
9955 {
9956         char buf[STRERR_BUFSIZE];
9957         int err, ret;
9958         FILE *f;
9959
9960         f = fopen(file, "r");
9961         if (!f) {
9962                 err = -errno;
9963                 pr_debug("failed to open '%s': %s\n", file,
9964                          libbpf_strerror_r(err, buf, sizeof(buf)));
9965                 return err;
9966         }
9967         err = fscanf(f, fmt, &ret);
9968         if (err != 1) {
9969                 err = err == EOF ? -EIO : -errno;
9970                 pr_debug("failed to parse '%s': %s\n", file,
9971                         libbpf_strerror_r(err, buf, sizeof(buf)));
9972                 fclose(f);
9973                 return err;
9974         }
9975         fclose(f);
9976         return ret;
9977 }
9978
9979 static int determine_kprobe_perf_type(void)
9980 {
9981         const char *file = "/sys/bus/event_source/devices/kprobe/type";
9982
9983         return parse_uint_from_file(file, "%d\n");
9984 }
9985
9986 static int determine_uprobe_perf_type(void)
9987 {
9988         const char *file = "/sys/bus/event_source/devices/uprobe/type";
9989
9990         return parse_uint_from_file(file, "%d\n");
9991 }
9992
9993 static int determine_kprobe_retprobe_bit(void)
9994 {
9995         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
9996
9997         return parse_uint_from_file(file, "config:%d\n");
9998 }
9999
10000 static int determine_uprobe_retprobe_bit(void)
10001 {
10002         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10003
10004         return parse_uint_from_file(file, "config:%d\n");
10005 }
10006
10007 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10008 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10009
10010 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10011                                  uint64_t offset, int pid, size_t ref_ctr_off)
10012 {
10013         const size_t attr_sz = sizeof(struct perf_event_attr);
10014         struct perf_event_attr attr;
10015         char errmsg[STRERR_BUFSIZE];
10016         int type, pfd;
10017
10018         if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10019                 return -EINVAL;
10020
10021         memset(&attr, 0, attr_sz);
10022
10023         type = uprobe ? determine_uprobe_perf_type()
10024                       : determine_kprobe_perf_type();
10025         if (type < 0) {
10026                 pr_warn("failed to determine %s perf type: %s\n",
10027                         uprobe ? "uprobe" : "kprobe",
10028                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10029                 return type;
10030         }
10031         if (retprobe) {
10032                 int bit = uprobe ? determine_uprobe_retprobe_bit()
10033                                  : determine_kprobe_retprobe_bit();
10034
10035                 if (bit < 0) {
10036                         pr_warn("failed to determine %s retprobe bit: %s\n",
10037                                 uprobe ? "uprobe" : "kprobe",
10038                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10039                         return bit;
10040                 }
10041                 attr.config |= 1 << bit;
10042         }
10043         attr.size = attr_sz;
10044         attr.type = type;
10045         attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10046         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10047         attr.config2 = offset;           /* kprobe_addr or probe_offset */
10048
10049         /* pid filter is meaningful only for uprobes */
10050         pfd = syscall(__NR_perf_event_open, &attr,
10051                       pid < 0 ? -1 : pid /* pid */,
10052                       pid == -1 ? 0 : -1 /* cpu */,
10053                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10054         return pfd >= 0 ? pfd : -errno;
10055 }
10056
10057 static int append_to_file(const char *file, const char *fmt, ...)
10058 {
10059         int fd, n, err = 0;
10060         va_list ap;
10061         char buf[1024];
10062
10063         va_start(ap, fmt);
10064         n = vsnprintf(buf, sizeof(buf), fmt, ap);
10065         va_end(ap);
10066
10067         if (n < 0 || n >= sizeof(buf))
10068                 return -EINVAL;
10069
10070         fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10071         if (fd < 0)
10072                 return -errno;
10073
10074         if (write(fd, buf, n) < 0)
10075                 err = -errno;
10076
10077         close(fd);
10078         return err;
10079 }
10080
10081 #define DEBUGFS "/sys/kernel/debug/tracing"
10082 #define TRACEFS "/sys/kernel/tracing"
10083
10084 static bool use_debugfs(void)
10085 {
10086         static int has_debugfs = -1;
10087
10088         if (has_debugfs < 0)
10089                 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10090
10091         return has_debugfs == 1;
10092 }
10093
10094 static const char *tracefs_path(void)
10095 {
10096         return use_debugfs() ? DEBUGFS : TRACEFS;
10097 }
10098
10099 static const char *tracefs_kprobe_events(void)
10100 {
10101         return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10102 }
10103
10104 static const char *tracefs_uprobe_events(void)
10105 {
10106         return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10107 }
10108
10109 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10110                                          const char *kfunc_name, size_t offset)
10111 {
10112         static int index = 0;
10113         int i;
10114
10115         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10116                  __sync_fetch_and_add(&index, 1));
10117
10118         /* sanitize binary_path in the probe name */
10119         for (i = 0; buf[i]; i++) {
10120                 if (!isalnum(buf[i]))
10121                         buf[i] = '_';
10122         }
10123 }
10124
10125 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10126                                    const char *kfunc_name, size_t offset)
10127 {
10128         return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10129                               retprobe ? 'r' : 'p',
10130                               retprobe ? "kretprobes" : "kprobes",
10131                               probe_name, kfunc_name, offset);
10132 }
10133
10134 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10135 {
10136         return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10137                               retprobe ? "kretprobes" : "kprobes", probe_name);
10138 }
10139
10140 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10141 {
10142         char file[256];
10143
10144         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10145                  tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10146
10147         return parse_uint_from_file(file, "%d\n");
10148 }
10149
10150 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10151                                          const char *kfunc_name, size_t offset, int pid)
10152 {
10153         const size_t attr_sz = sizeof(struct perf_event_attr);
10154         struct perf_event_attr attr;
10155         char errmsg[STRERR_BUFSIZE];
10156         int type, pfd, err;
10157
10158         err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10159         if (err < 0) {
10160                 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10161                         kfunc_name, offset,
10162                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10163                 return err;
10164         }
10165         type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10166         if (type < 0) {
10167                 err = type;
10168                 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10169                         kfunc_name, offset,
10170                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10171                 goto err_clean_legacy;
10172         }
10173
10174         memset(&attr, 0, attr_sz);
10175         attr.size = attr_sz;
10176         attr.config = type;
10177         attr.type = PERF_TYPE_TRACEPOINT;
10178
10179         pfd = syscall(__NR_perf_event_open, &attr,
10180                       pid < 0 ? -1 : pid, /* pid */
10181                       pid == -1 ? 0 : -1, /* cpu */
10182                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10183         if (pfd < 0) {
10184                 err = -errno;
10185                 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10186                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10187                 goto err_clean_legacy;
10188         }
10189         return pfd;
10190
10191 err_clean_legacy:
10192         /* Clear the newly added legacy kprobe_event */
10193         remove_kprobe_event_legacy(probe_name, retprobe);
10194         return err;
10195 }
10196
10197 static const char *arch_specific_syscall_pfx(void)
10198 {
10199 #if defined(__x86_64__)
10200         return "x64";
10201 #elif defined(__i386__)
10202         return "ia32";
10203 #elif defined(__s390x__)
10204         return "s390x";
10205 #elif defined(__s390__)
10206         return "s390";
10207 #elif defined(__arm__)
10208         return "arm";
10209 #elif defined(__aarch64__)
10210         return "arm64";
10211 #elif defined(__mips__)
10212         return "mips";
10213 #elif defined(__riscv)
10214         return "riscv";
10215 #elif defined(__powerpc__)
10216         return "powerpc";
10217 #elif defined(__powerpc64__)
10218         return "powerpc64";
10219 #else
10220         return NULL;
10221 #endif
10222 }
10223
10224 static int probe_kern_syscall_wrapper(void)
10225 {
10226         char syscall_name[64];
10227         const char *ksys_pfx;
10228
10229         ksys_pfx = arch_specific_syscall_pfx();
10230         if (!ksys_pfx)
10231                 return 0;
10232
10233         snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10234
10235         if (determine_kprobe_perf_type() >= 0) {
10236                 int pfd;
10237
10238                 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10239                 if (pfd >= 0)
10240                         close(pfd);
10241
10242                 return pfd >= 0 ? 1 : 0;
10243         } else { /* legacy mode */
10244                 char probe_name[128];
10245
10246                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10247                 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10248                         return 0;
10249
10250                 (void)remove_kprobe_event_legacy(probe_name, false);
10251                 return 1;
10252         }
10253 }
10254
10255 struct bpf_link *
10256 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10257                                 const char *func_name,
10258                                 const struct bpf_kprobe_opts *opts)
10259 {
10260         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10261         enum probe_attach_mode attach_mode;
10262         char errmsg[STRERR_BUFSIZE];
10263         char *legacy_probe = NULL;
10264         struct bpf_link *link;
10265         size_t offset;
10266         bool retprobe, legacy;
10267         int pfd, err;
10268
10269         if (!OPTS_VALID(opts, bpf_kprobe_opts))
10270                 return libbpf_err_ptr(-EINVAL);
10271
10272         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
10273         retprobe = OPTS_GET(opts, retprobe, false);
10274         offset = OPTS_GET(opts, offset, 0);
10275         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10276
10277         legacy = determine_kprobe_perf_type() < 0;
10278         switch (attach_mode) {
10279         case PROBE_ATTACH_MODE_LEGACY:
10280                 legacy = true;
10281                 pe_opts.force_ioctl_attach = true;
10282                 break;
10283         case PROBE_ATTACH_MODE_PERF:
10284                 if (legacy)
10285                         return libbpf_err_ptr(-ENOTSUP);
10286                 pe_opts.force_ioctl_attach = true;
10287                 break;
10288         case PROBE_ATTACH_MODE_LINK:
10289                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
10290                         return libbpf_err_ptr(-ENOTSUP);
10291                 break;
10292         case PROBE_ATTACH_MODE_DEFAULT:
10293                 break;
10294         default:
10295                 return libbpf_err_ptr(-EINVAL);
10296         }
10297
10298         if (!legacy) {
10299                 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10300                                             func_name, offset,
10301                                             -1 /* pid */, 0 /* ref_ctr_off */);
10302         } else {
10303                 char probe_name[256];
10304
10305                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10306                                              func_name, offset);
10307
10308                 legacy_probe = strdup(probe_name);
10309                 if (!legacy_probe)
10310                         return libbpf_err_ptr(-ENOMEM);
10311
10312                 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10313                                                     offset, -1 /* pid */);
10314         }
10315         if (pfd < 0) {
10316                 err = -errno;
10317                 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10318                         prog->name, retprobe ? "kretprobe" : "kprobe",
10319                         func_name, offset,
10320                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10321                 goto err_out;
10322         }
10323         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10324         err = libbpf_get_error(link);
10325         if (err) {
10326                 close(pfd);
10327                 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10328                         prog->name, retprobe ? "kretprobe" : "kprobe",
10329                         func_name, offset,
10330                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10331                 goto err_clean_legacy;
10332         }
10333         if (legacy) {
10334                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10335
10336                 perf_link->legacy_probe_name = legacy_probe;
10337                 perf_link->legacy_is_kprobe = true;
10338                 perf_link->legacy_is_retprobe = retprobe;
10339         }
10340
10341         return link;
10342
10343 err_clean_legacy:
10344         if (legacy)
10345                 remove_kprobe_event_legacy(legacy_probe, retprobe);
10346 err_out:
10347         free(legacy_probe);
10348         return libbpf_err_ptr(err);
10349 }
10350
10351 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10352                                             bool retprobe,
10353                                             const char *func_name)
10354 {
10355         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10356                 .retprobe = retprobe,
10357         );
10358
10359         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10360 }
10361
10362 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10363                                               const char *syscall_name,
10364                                               const struct bpf_ksyscall_opts *opts)
10365 {
10366         LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10367         char func_name[128];
10368
10369         if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10370                 return libbpf_err_ptr(-EINVAL);
10371
10372         if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10373                 /* arch_specific_syscall_pfx() should never return NULL here
10374                  * because it is guarded by kernel_supports(). However, since
10375                  * compiler does not know that we have an explicit conditional
10376                  * as well.
10377                  */
10378                 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10379                          arch_specific_syscall_pfx() ? : "", syscall_name);
10380         } else {
10381                 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10382         }
10383
10384         kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10385         kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10386
10387         return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10388 }
10389
10390 /* Adapted from perf/util/string.c */
10391 static bool glob_match(const char *str, const char *pat)
10392 {
10393         while (*str && *pat && *pat != '*') {
10394                 if (*pat == '?') {      /* Matches any single character */
10395                         str++;
10396                         pat++;
10397                         continue;
10398                 }
10399                 if (*str != *pat)
10400                         return false;
10401                 str++;
10402                 pat++;
10403         }
10404         /* Check wild card */
10405         if (*pat == '*') {
10406                 while (*pat == '*')
10407                         pat++;
10408                 if (!*pat) /* Tail wild card matches all */
10409                         return true;
10410                 while (*str)
10411                         if (glob_match(str++, pat))
10412                                 return true;
10413         }
10414         return !*str && !*pat;
10415 }
10416
10417 struct kprobe_multi_resolve {
10418         const char *pattern;
10419         unsigned long *addrs;
10420         size_t cap;
10421         size_t cnt;
10422 };
10423
10424 static int
10425 resolve_kprobe_multi_cb(unsigned long long sym_addr, char sym_type,
10426                         const char *sym_name, void *ctx)
10427 {
10428         struct kprobe_multi_resolve *res = ctx;
10429         int err;
10430
10431         if (!glob_match(sym_name, res->pattern))
10432                 return 0;
10433
10434         err = libbpf_ensure_mem((void **) &res->addrs, &res->cap, sizeof(unsigned long),
10435                                 res->cnt + 1);
10436         if (err)
10437                 return err;
10438
10439         res->addrs[res->cnt++] = (unsigned long) sym_addr;
10440         return 0;
10441 }
10442
10443 struct bpf_link *
10444 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
10445                                       const char *pattern,
10446                                       const struct bpf_kprobe_multi_opts *opts)
10447 {
10448         LIBBPF_OPTS(bpf_link_create_opts, lopts);
10449         struct kprobe_multi_resolve res = {
10450                 .pattern = pattern,
10451         };
10452         struct bpf_link *link = NULL;
10453         char errmsg[STRERR_BUFSIZE];
10454         const unsigned long *addrs;
10455         int err, link_fd, prog_fd;
10456         const __u64 *cookies;
10457         const char **syms;
10458         bool retprobe;
10459         size_t cnt;
10460
10461         if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
10462                 return libbpf_err_ptr(-EINVAL);
10463
10464         syms    = OPTS_GET(opts, syms, false);
10465         addrs   = OPTS_GET(opts, addrs, false);
10466         cnt     = OPTS_GET(opts, cnt, false);
10467         cookies = OPTS_GET(opts, cookies, false);
10468
10469         if (!pattern && !addrs && !syms)
10470                 return libbpf_err_ptr(-EINVAL);
10471         if (pattern && (addrs || syms || cookies || cnt))
10472                 return libbpf_err_ptr(-EINVAL);
10473         if (!pattern && !cnt)
10474                 return libbpf_err_ptr(-EINVAL);
10475         if (addrs && syms)
10476                 return libbpf_err_ptr(-EINVAL);
10477
10478         if (pattern) {
10479                 err = libbpf_kallsyms_parse(resolve_kprobe_multi_cb, &res);
10480                 if (err)
10481                         goto error;
10482                 if (!res.cnt) {
10483                         err = -ENOENT;
10484                         goto error;
10485                 }
10486                 addrs = res.addrs;
10487                 cnt = res.cnt;
10488         }
10489
10490         retprobe = OPTS_GET(opts, retprobe, false);
10491
10492         lopts.kprobe_multi.syms = syms;
10493         lopts.kprobe_multi.addrs = addrs;
10494         lopts.kprobe_multi.cookies = cookies;
10495         lopts.kprobe_multi.cnt = cnt;
10496         lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
10497
10498         link = calloc(1, sizeof(*link));
10499         if (!link) {
10500                 err = -ENOMEM;
10501                 goto error;
10502         }
10503         link->detach = &bpf_link__detach_fd;
10504
10505         prog_fd = bpf_program__fd(prog);
10506         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
10507         if (link_fd < 0) {
10508                 err = -errno;
10509                 pr_warn("prog '%s': failed to attach: %s\n",
10510                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10511                 goto error;
10512         }
10513         link->fd = link_fd;
10514         free(res.addrs);
10515         return link;
10516
10517 error:
10518         free(link);
10519         free(res.addrs);
10520         return libbpf_err_ptr(err);
10521 }
10522
10523 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10524 {
10525         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
10526         unsigned long offset = 0;
10527         const char *func_name;
10528         char *func;
10529         int n;
10530
10531         *link = NULL;
10532
10533         /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
10534         if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
10535                 return 0;
10536
10537         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
10538         if (opts.retprobe)
10539                 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
10540         else
10541                 func_name = prog->sec_name + sizeof("kprobe/") - 1;
10542
10543         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
10544         if (n < 1) {
10545                 pr_warn("kprobe name is invalid: %s\n", func_name);
10546                 return -EINVAL;
10547         }
10548         if (opts.retprobe && offset != 0) {
10549                 free(func);
10550                 pr_warn("kretprobes do not support offset specification\n");
10551                 return -EINVAL;
10552         }
10553
10554         opts.offset = offset;
10555         *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
10556         free(func);
10557         return libbpf_get_error(*link);
10558 }
10559
10560 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10561 {
10562         LIBBPF_OPTS(bpf_ksyscall_opts, opts);
10563         const char *syscall_name;
10564
10565         *link = NULL;
10566
10567         /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
10568         if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
10569                 return 0;
10570
10571         opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
10572         if (opts.retprobe)
10573                 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
10574         else
10575                 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
10576
10577         *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
10578         return *link ? 0 : -errno;
10579 }
10580
10581 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
10582 {
10583         LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
10584         const char *spec;
10585         char *pattern;
10586         int n;
10587
10588         *link = NULL;
10589
10590         /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
10591         if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
10592             strcmp(prog->sec_name, "kretprobe.multi") == 0)
10593                 return 0;
10594
10595         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
10596         if (opts.retprobe)
10597                 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
10598         else
10599                 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
10600
10601         n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
10602         if (n < 1) {
10603                 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
10604                 return -EINVAL;
10605         }
10606
10607         *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
10608         free(pattern);
10609         return libbpf_get_error(*link);
10610 }
10611
10612 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
10613                                          const char *binary_path, uint64_t offset)
10614 {
10615         int i;
10616
10617         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
10618
10619         /* sanitize binary_path in the probe name */
10620         for (i = 0; buf[i]; i++) {
10621                 if (!isalnum(buf[i]))
10622                         buf[i] = '_';
10623         }
10624 }
10625
10626 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
10627                                           const char *binary_path, size_t offset)
10628 {
10629         return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
10630                               retprobe ? 'r' : 'p',
10631                               retprobe ? "uretprobes" : "uprobes",
10632                               probe_name, binary_path, offset);
10633 }
10634
10635 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
10636 {
10637         return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
10638                               retprobe ? "uretprobes" : "uprobes", probe_name);
10639 }
10640
10641 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10642 {
10643         char file[512];
10644
10645         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10646                  tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
10647
10648         return parse_uint_from_file(file, "%d\n");
10649 }
10650
10651 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
10652                                          const char *binary_path, size_t offset, int pid)
10653 {
10654         const size_t attr_sz = sizeof(struct perf_event_attr);
10655         struct perf_event_attr attr;
10656         int type, pfd, err;
10657
10658         err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
10659         if (err < 0) {
10660                 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
10661                         binary_path, (size_t)offset, err);
10662                 return err;
10663         }
10664         type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
10665         if (type < 0) {
10666                 err = type;
10667                 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
10668                         binary_path, offset, err);
10669                 goto err_clean_legacy;
10670         }
10671
10672         memset(&attr, 0, attr_sz);
10673         attr.size = attr_sz;
10674         attr.config = type;
10675         attr.type = PERF_TYPE_TRACEPOINT;
10676
10677         pfd = syscall(__NR_perf_event_open, &attr,
10678                       pid < 0 ? -1 : pid, /* pid */
10679                       pid == -1 ? 0 : -1, /* cpu */
10680                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10681         if (pfd < 0) {
10682                 err = -errno;
10683                 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
10684                 goto err_clean_legacy;
10685         }
10686         return pfd;
10687
10688 err_clean_legacy:
10689         /* Clear the newly added legacy uprobe_event */
10690         remove_uprobe_event_legacy(probe_name, retprobe);
10691         return err;
10692 }
10693
10694 /* Return next ELF section of sh_type after scn, or first of that type if scn is NULL. */
10695 static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
10696 {
10697         while ((scn = elf_nextscn(elf, scn)) != NULL) {
10698                 GElf_Shdr sh;
10699
10700                 if (!gelf_getshdr(scn, &sh))
10701                         continue;
10702                 if (sh.sh_type == sh_type)
10703                         return scn;
10704         }
10705         return NULL;
10706 }
10707
10708 /* Find offset of function name in the provided ELF object. "binary_path" is
10709  * the path to the ELF binary represented by "elf", and only used for error
10710  * reporting matters. "name" matches symbol name or name@@LIB for library
10711  * functions.
10712  */
10713 static long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
10714 {
10715         int i, sh_types[2] = { SHT_DYNSYM, SHT_SYMTAB };
10716         bool is_shared_lib, is_name_qualified;
10717         long ret = -ENOENT;
10718         size_t name_len;
10719         GElf_Ehdr ehdr;
10720
10721         if (!gelf_getehdr(elf, &ehdr)) {
10722                 pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
10723                 ret = -LIBBPF_ERRNO__FORMAT;
10724                 goto out;
10725         }
10726         /* for shared lib case, we do not need to calculate relative offset */
10727         is_shared_lib = ehdr.e_type == ET_DYN;
10728
10729         name_len = strlen(name);
10730         /* Does name specify "@@LIB"? */
10731         is_name_qualified = strstr(name, "@@") != NULL;
10732
10733         /* Search SHT_DYNSYM, SHT_SYMTAB for symbol. This search order is used because if
10734          * a binary is stripped, it may only have SHT_DYNSYM, and a fully-statically
10735          * linked binary may not have SHT_DYMSYM, so absence of a section should not be
10736          * reported as a warning/error.
10737          */
10738         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
10739                 size_t nr_syms, strtabidx, idx;
10740                 Elf_Data *symbols = NULL;
10741                 Elf_Scn *scn = NULL;
10742                 int last_bind = -1;
10743                 const char *sname;
10744                 GElf_Shdr sh;
10745
10746                 scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
10747                 if (!scn) {
10748                         pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
10749                                  binary_path);
10750                         continue;
10751                 }
10752                 if (!gelf_getshdr(scn, &sh))
10753                         continue;
10754                 strtabidx = sh.sh_link;
10755                 symbols = elf_getdata(scn, 0);
10756                 if (!symbols) {
10757                         pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
10758                                 binary_path, elf_errmsg(-1));
10759                         ret = -LIBBPF_ERRNO__FORMAT;
10760                         goto out;
10761                 }
10762                 nr_syms = symbols->d_size / sh.sh_entsize;
10763
10764                 for (idx = 0; idx < nr_syms; idx++) {
10765                         int curr_bind;
10766                         GElf_Sym sym;
10767                         Elf_Scn *sym_scn;
10768                         GElf_Shdr sym_sh;
10769
10770                         if (!gelf_getsym(symbols, idx, &sym))
10771                                 continue;
10772
10773                         if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
10774                                 continue;
10775
10776                         sname = elf_strptr(elf, strtabidx, sym.st_name);
10777                         if (!sname)
10778                                 continue;
10779
10780                         curr_bind = GELF_ST_BIND(sym.st_info);
10781
10782                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
10783                         if (strncmp(sname, name, name_len) != 0)
10784                                 continue;
10785                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
10786                          * additional characters in sname should be of the form "@@LIB".
10787                          */
10788                         if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
10789                                 continue;
10790
10791                         if (ret >= 0) {
10792                                 /* handle multiple matches */
10793                                 if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
10794                                         /* Only accept one non-weak bind. */
10795                                         pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
10796                                                 sname, name, binary_path);
10797                                         ret = -LIBBPF_ERRNO__FORMAT;
10798                                         goto out;
10799                                 } else if (curr_bind == STB_WEAK) {
10800                                         /* already have a non-weak bind, and
10801                                          * this is a weak bind, so ignore.
10802                                          */
10803                                         continue;
10804                                 }
10805                         }
10806
10807                         /* Transform symbol's virtual address (absolute for
10808                          * binaries and relative for shared libs) into file
10809                          * offset, which is what kernel is expecting for
10810                          * uprobe/uretprobe attachment.
10811                          * See Documentation/trace/uprobetracer.rst for more
10812                          * details.
10813                          * This is done by looking up symbol's containing
10814                          * section's header and using it's virtual address
10815                          * (sh_addr) and corresponding file offset (sh_offset)
10816                          * to transform sym.st_value (virtual address) into
10817                          * desired final file offset.
10818                          */
10819                         sym_scn = elf_getscn(elf, sym.st_shndx);
10820                         if (!sym_scn)
10821                                 continue;
10822                         if (!gelf_getshdr(sym_scn, &sym_sh))
10823                                 continue;
10824
10825                         ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
10826                         last_bind = curr_bind;
10827                 }
10828                 if (ret > 0)
10829                         break;
10830         }
10831
10832         if (ret > 0) {
10833                 pr_debug("elf: symbol address match for '%s' in '%s': 0x%lx\n", name, binary_path,
10834                          ret);
10835         } else {
10836                 if (ret == 0) {
10837                         pr_warn("elf: '%s' is 0 in symtab for '%s': %s\n", name, binary_path,
10838                                 is_shared_lib ? "should not be 0 in a shared library" :
10839                                                 "try using shared library path instead");
10840                         ret = -ENOENT;
10841                 } else {
10842                         pr_warn("elf: failed to find symbol '%s' in '%s'\n", name, binary_path);
10843                 }
10844         }
10845 out:
10846         return ret;
10847 }
10848
10849 /* Find offset of function name in ELF object specified by path. "name" matches
10850  * symbol name or name@@LIB for library functions.
10851  */
10852 static long elf_find_func_offset_from_file(const char *binary_path, const char *name)
10853 {
10854         char errmsg[STRERR_BUFSIZE];
10855         long ret = -ENOENT;
10856         Elf *elf;
10857         int fd;
10858
10859         fd = open(binary_path, O_RDONLY | O_CLOEXEC);
10860         if (fd < 0) {
10861                 ret = -errno;
10862                 pr_warn("failed to open %s: %s\n", binary_path,
10863                         libbpf_strerror_r(ret, errmsg, sizeof(errmsg)));
10864                 return ret;
10865         }
10866         elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
10867         if (!elf) {
10868                 pr_warn("elf: could not read elf from %s: %s\n", binary_path, elf_errmsg(-1));
10869                 close(fd);
10870                 return -LIBBPF_ERRNO__FORMAT;
10871         }
10872
10873         ret = elf_find_func_offset(elf, binary_path, name);
10874         elf_end(elf);
10875         close(fd);
10876         return ret;
10877 }
10878
10879 /* Find offset of function name in archive specified by path. Currently
10880  * supported are .zip files that do not compress their contents, as used on
10881  * Android in the form of APKs, for example. "file_name" is the name of the ELF
10882  * file inside the archive. "func_name" matches symbol name or name@@LIB for
10883  * library functions.
10884  *
10885  * An overview of the APK format specifically provided here:
10886  * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
10887  */
10888 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
10889                                               const char *func_name)
10890 {
10891         struct zip_archive *archive;
10892         struct zip_entry entry;
10893         long ret;
10894         Elf *elf;
10895
10896         archive = zip_archive_open(archive_path);
10897         if (IS_ERR(archive)) {
10898                 ret = PTR_ERR(archive);
10899                 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
10900                 return ret;
10901         }
10902
10903         ret = zip_archive_find_entry(archive, file_name, &entry);
10904         if (ret) {
10905                 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
10906                         archive_path, ret);
10907                 goto out;
10908         }
10909         pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
10910                  (unsigned long)entry.data_offset);
10911
10912         if (entry.compression) {
10913                 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
10914                         archive_path);
10915                 ret = -LIBBPF_ERRNO__FORMAT;
10916                 goto out;
10917         }
10918
10919         elf = elf_memory((void *)entry.data, entry.data_length);
10920         if (!elf) {
10921                 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
10922                         elf_errmsg(-1));
10923                 ret = -LIBBPF_ERRNO__LIBELF;
10924                 goto out;
10925         }
10926
10927         ret = elf_find_func_offset(elf, file_name, func_name);
10928         if (ret > 0) {
10929                 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
10930                          func_name, file_name, archive_path, entry.data_offset, ret,
10931                          ret + entry.data_offset);
10932                 ret += entry.data_offset;
10933         }
10934         elf_end(elf);
10935
10936 out:
10937         zip_archive_close(archive);
10938         return ret;
10939 }
10940
10941 static const char *arch_specific_lib_paths(void)
10942 {
10943         /*
10944          * Based on https://packages.debian.org/sid/libc6.
10945          *
10946          * Assume that the traced program is built for the same architecture
10947          * as libbpf, which should cover the vast majority of cases.
10948          */
10949 #if defined(__x86_64__)
10950         return "/lib/x86_64-linux-gnu";
10951 #elif defined(__i386__)
10952         return "/lib/i386-linux-gnu";
10953 #elif defined(__s390x__)
10954         return "/lib/s390x-linux-gnu";
10955 #elif defined(__s390__)
10956         return "/lib/s390-linux-gnu";
10957 #elif defined(__arm__) && defined(__SOFTFP__)
10958         return "/lib/arm-linux-gnueabi";
10959 #elif defined(__arm__) && !defined(__SOFTFP__)
10960         return "/lib/arm-linux-gnueabihf";
10961 #elif defined(__aarch64__)
10962         return "/lib/aarch64-linux-gnu";
10963 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
10964         return "/lib/mips64el-linux-gnuabi64";
10965 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
10966         return "/lib/mipsel-linux-gnu";
10967 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
10968         return "/lib/powerpc64le-linux-gnu";
10969 #elif defined(__sparc__) && defined(__arch64__)
10970         return "/lib/sparc64-linux-gnu";
10971 #elif defined(__riscv) && __riscv_xlen == 64
10972         return "/lib/riscv64-linux-gnu";
10973 #else
10974         return NULL;
10975 #endif
10976 }
10977
10978 /* Get full path to program/shared library. */
10979 static int resolve_full_path(const char *file, char *result, size_t result_sz)
10980 {
10981         const char *search_paths[3] = {};
10982         int i, perm;
10983
10984         if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
10985                 search_paths[0] = getenv("LD_LIBRARY_PATH");
10986                 search_paths[1] = "/usr/lib64:/usr/lib";
10987                 search_paths[2] = arch_specific_lib_paths();
10988                 perm = R_OK;
10989         } else {
10990                 search_paths[0] = getenv("PATH");
10991                 search_paths[1] = "/usr/bin:/usr/sbin";
10992                 perm = R_OK | X_OK;
10993         }
10994
10995         for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
10996                 const char *s;
10997
10998                 if (!search_paths[i])
10999                         continue;
11000                 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11001                         char *next_path;
11002                         int seg_len;
11003
11004                         if (s[0] == ':')
11005                                 s++;
11006                         next_path = strchr(s, ':');
11007                         seg_len = next_path ? next_path - s : strlen(s);
11008                         if (!seg_len)
11009                                 continue;
11010                         snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11011                         /* ensure it has required permissions */
11012                         if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11013                                 continue;
11014                         pr_debug("resolved '%s' to '%s'\n", file, result);
11015                         return 0;
11016                 }
11017         }
11018         return -ENOENT;
11019 }
11020
11021 LIBBPF_API struct bpf_link *
11022 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11023                                 const char *binary_path, size_t func_offset,
11024                                 const struct bpf_uprobe_opts *opts)
11025 {
11026         const char *archive_path = NULL, *archive_sep = NULL;
11027         char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11028         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11029         enum probe_attach_mode attach_mode;
11030         char full_path[PATH_MAX];
11031         struct bpf_link *link;
11032         size_t ref_ctr_off;
11033         int pfd, err;
11034         bool retprobe, legacy;
11035         const char *func_name;
11036
11037         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11038                 return libbpf_err_ptr(-EINVAL);
11039
11040         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11041         retprobe = OPTS_GET(opts, retprobe, false);
11042         ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11043         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11044
11045         if (!binary_path)
11046                 return libbpf_err_ptr(-EINVAL);
11047
11048         /* Check if "binary_path" refers to an archive. */
11049         archive_sep = strstr(binary_path, "!/");
11050         if (archive_sep) {
11051                 full_path[0] = '\0';
11052                 libbpf_strlcpy(full_path, binary_path,
11053                                min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11054                 archive_path = full_path;
11055                 binary_path = archive_sep + 2;
11056         } else if (!strchr(binary_path, '/')) {
11057                 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11058                 if (err) {
11059                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11060                                 prog->name, binary_path, err);
11061                         return libbpf_err_ptr(err);
11062                 }
11063                 binary_path = full_path;
11064         }
11065         func_name = OPTS_GET(opts, func_name, NULL);
11066         if (func_name) {
11067                 long sym_off;
11068
11069                 if (archive_path) {
11070                         sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11071                                                                     func_name);
11072                         binary_path = archive_path;
11073                 } else {
11074                         sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11075                 }
11076                 if (sym_off < 0)
11077                         return libbpf_err_ptr(sym_off);
11078                 func_offset += sym_off;
11079         }
11080
11081         legacy = determine_uprobe_perf_type() < 0;
11082         switch (attach_mode) {
11083         case PROBE_ATTACH_MODE_LEGACY:
11084                 legacy = true;
11085                 pe_opts.force_ioctl_attach = true;
11086                 break;
11087         case PROBE_ATTACH_MODE_PERF:
11088                 if (legacy)
11089                         return libbpf_err_ptr(-ENOTSUP);
11090                 pe_opts.force_ioctl_attach = true;
11091                 break;
11092         case PROBE_ATTACH_MODE_LINK:
11093                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11094                         return libbpf_err_ptr(-ENOTSUP);
11095                 break;
11096         case PROBE_ATTACH_MODE_DEFAULT:
11097                 break;
11098         default:
11099                 return libbpf_err_ptr(-EINVAL);
11100         }
11101
11102         if (!legacy) {
11103                 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
11104                                             func_offset, pid, ref_ctr_off);
11105         } else {
11106                 char probe_name[PATH_MAX + 64];
11107
11108                 if (ref_ctr_off)
11109                         return libbpf_err_ptr(-EINVAL);
11110
11111                 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
11112                                              binary_path, func_offset);
11113
11114                 legacy_probe = strdup(probe_name);
11115                 if (!legacy_probe)
11116                         return libbpf_err_ptr(-ENOMEM);
11117
11118                 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
11119                                                     binary_path, func_offset, pid);
11120         }
11121         if (pfd < 0) {
11122                 err = -errno;
11123                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
11124                         prog->name, retprobe ? "uretprobe" : "uprobe",
11125                         binary_path, func_offset,
11126                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11127                 goto err_out;
11128         }
11129
11130         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11131         err = libbpf_get_error(link);
11132         if (err) {
11133                 close(pfd);
11134                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
11135                         prog->name, retprobe ? "uretprobe" : "uprobe",
11136                         binary_path, func_offset,
11137                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11138                 goto err_clean_legacy;
11139         }
11140         if (legacy) {
11141                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11142
11143                 perf_link->legacy_probe_name = legacy_probe;
11144                 perf_link->legacy_is_kprobe = false;
11145                 perf_link->legacy_is_retprobe = retprobe;
11146         }
11147         return link;
11148
11149 err_clean_legacy:
11150         if (legacy)
11151                 remove_uprobe_event_legacy(legacy_probe, retprobe);
11152 err_out:
11153         free(legacy_probe);
11154         return libbpf_err_ptr(err);
11155 }
11156
11157 /* Format of u[ret]probe section definition supporting auto-attach:
11158  * u[ret]probe/binary:function[+offset]
11159  *
11160  * binary can be an absolute/relative path or a filename; the latter is resolved to a
11161  * full binary path via bpf_program__attach_uprobe_opts.
11162  *
11163  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
11164  * specified (and auto-attach is not possible) or the above format is specified for
11165  * auto-attach.
11166  */
11167 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11168 {
11169         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
11170         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11171         int n, ret = -EINVAL;
11172         long offset = 0;
11173
11174         *link = NULL;
11175
11176         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li",
11177                    &probe_type, &binary_path, &func_name, &offset);
11178         switch (n) {
11179         case 1:
11180                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11181                 ret = 0;
11182                 break;
11183         case 2:
11184                 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
11185                         prog->name, prog->sec_name);
11186                 break;
11187         case 3:
11188         case 4:
11189                 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
11190                                 strcmp(probe_type, "uretprobe.s") == 0;
11191                 if (opts.retprobe && offset != 0) {
11192                         pr_warn("prog '%s': uretprobes do not support offset specification\n",
11193                                 prog->name);
11194                         break;
11195                 }
11196                 opts.func_name = func_name;
11197                 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
11198                 ret = libbpf_get_error(*link);
11199                 break;
11200         default:
11201                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11202                         prog->sec_name);
11203                 break;
11204         }
11205         free(probe_type);
11206         free(binary_path);
11207         free(func_name);
11208
11209         return ret;
11210 }
11211
11212 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
11213                                             bool retprobe, pid_t pid,
11214                                             const char *binary_path,
11215                                             size_t func_offset)
11216 {
11217         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
11218
11219         return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
11220 }
11221
11222 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
11223                                           pid_t pid, const char *binary_path,
11224                                           const char *usdt_provider, const char *usdt_name,
11225                                           const struct bpf_usdt_opts *opts)
11226 {
11227         char resolved_path[512];
11228         struct bpf_object *obj = prog->obj;
11229         struct bpf_link *link;
11230         __u64 usdt_cookie;
11231         int err;
11232
11233         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11234                 return libbpf_err_ptr(-EINVAL);
11235
11236         if (bpf_program__fd(prog) < 0) {
11237                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
11238                         prog->name);
11239                 return libbpf_err_ptr(-EINVAL);
11240         }
11241
11242         if (!binary_path)
11243                 return libbpf_err_ptr(-EINVAL);
11244
11245         if (!strchr(binary_path, '/')) {
11246                 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
11247                 if (err) {
11248                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11249                                 prog->name, binary_path, err);
11250                         return libbpf_err_ptr(err);
11251                 }
11252                 binary_path = resolved_path;
11253         }
11254
11255         /* USDT manager is instantiated lazily on first USDT attach. It will
11256          * be destroyed together with BPF object in bpf_object__close().
11257          */
11258         if (IS_ERR(obj->usdt_man))
11259                 return libbpf_ptr(obj->usdt_man);
11260         if (!obj->usdt_man) {
11261                 obj->usdt_man = usdt_manager_new(obj);
11262                 if (IS_ERR(obj->usdt_man))
11263                         return libbpf_ptr(obj->usdt_man);
11264         }
11265
11266         usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
11267         link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
11268                                         usdt_provider, usdt_name, usdt_cookie);
11269         err = libbpf_get_error(link);
11270         if (err)
11271                 return libbpf_err_ptr(err);
11272         return link;
11273 }
11274
11275 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11276 {
11277         char *path = NULL, *provider = NULL, *name = NULL;
11278         const char *sec_name;
11279         int n, err;
11280
11281         sec_name = bpf_program__section_name(prog);
11282         if (strcmp(sec_name, "usdt") == 0) {
11283                 /* no auto-attach for just SEC("usdt") */
11284                 *link = NULL;
11285                 return 0;
11286         }
11287
11288         n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
11289         if (n != 3) {
11290                 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
11291                         sec_name);
11292                 err = -EINVAL;
11293         } else {
11294                 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
11295                                                  provider, name, NULL);
11296                 err = libbpf_get_error(*link);
11297         }
11298         free(path);
11299         free(provider);
11300         free(name);
11301         return err;
11302 }
11303
11304 static int determine_tracepoint_id(const char *tp_category,
11305                                    const char *tp_name)
11306 {
11307         char file[PATH_MAX];
11308         int ret;
11309
11310         ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11311                        tracefs_path(), tp_category, tp_name);
11312         if (ret < 0)
11313                 return -errno;
11314         if (ret >= sizeof(file)) {
11315                 pr_debug("tracepoint %s/%s path is too long\n",
11316                          tp_category, tp_name);
11317                 return -E2BIG;
11318         }
11319         return parse_uint_from_file(file, "%d\n");
11320 }
11321
11322 static int perf_event_open_tracepoint(const char *tp_category,
11323                                       const char *tp_name)
11324 {
11325         const size_t attr_sz = sizeof(struct perf_event_attr);
11326         struct perf_event_attr attr;
11327         char errmsg[STRERR_BUFSIZE];
11328         int tp_id, pfd, err;
11329
11330         tp_id = determine_tracepoint_id(tp_category, tp_name);
11331         if (tp_id < 0) {
11332                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
11333                         tp_category, tp_name,
11334                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
11335                 return tp_id;
11336         }
11337
11338         memset(&attr, 0, attr_sz);
11339         attr.type = PERF_TYPE_TRACEPOINT;
11340         attr.size = attr_sz;
11341         attr.config = tp_id;
11342
11343         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
11344                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11345         if (pfd < 0) {
11346                 err = -errno;
11347                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
11348                         tp_category, tp_name,
11349                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11350                 return err;
11351         }
11352         return pfd;
11353 }
11354
11355 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
11356                                                      const char *tp_category,
11357                                                      const char *tp_name,
11358                                                      const struct bpf_tracepoint_opts *opts)
11359 {
11360         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11361         char errmsg[STRERR_BUFSIZE];
11362         struct bpf_link *link;
11363         int pfd, err;
11364
11365         if (!OPTS_VALID(opts, bpf_tracepoint_opts))
11366                 return libbpf_err_ptr(-EINVAL);
11367
11368         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11369
11370         pfd = perf_event_open_tracepoint(tp_category, tp_name);
11371         if (pfd < 0) {
11372                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
11373                         prog->name, tp_category, tp_name,
11374                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11375                 return libbpf_err_ptr(pfd);
11376         }
11377         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11378         err = libbpf_get_error(link);
11379         if (err) {
11380                 close(pfd);
11381                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
11382                         prog->name, tp_category, tp_name,
11383                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11384                 return libbpf_err_ptr(err);
11385         }
11386         return link;
11387 }
11388
11389 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
11390                                                 const char *tp_category,
11391                                                 const char *tp_name)
11392 {
11393         return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
11394 }
11395
11396 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11397 {
11398         char *sec_name, *tp_cat, *tp_name;
11399
11400         *link = NULL;
11401
11402         /* no auto-attach for SEC("tp") or SEC("tracepoint") */
11403         if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
11404                 return 0;
11405
11406         sec_name = strdup(prog->sec_name);
11407         if (!sec_name)
11408                 return -ENOMEM;
11409
11410         /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
11411         if (str_has_pfx(prog->sec_name, "tp/"))
11412                 tp_cat = sec_name + sizeof("tp/") - 1;
11413         else
11414                 tp_cat = sec_name + sizeof("tracepoint/") - 1;
11415         tp_name = strchr(tp_cat, '/');
11416         if (!tp_name) {
11417                 free(sec_name);
11418                 return -EINVAL;
11419         }
11420         *tp_name = '\0';
11421         tp_name++;
11422
11423         *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
11424         free(sec_name);
11425         return libbpf_get_error(*link);
11426 }
11427
11428 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
11429                                                     const char *tp_name)
11430 {
11431         char errmsg[STRERR_BUFSIZE];
11432         struct bpf_link *link;
11433         int prog_fd, pfd;
11434
11435         prog_fd = bpf_program__fd(prog);
11436         if (prog_fd < 0) {
11437                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11438                 return libbpf_err_ptr(-EINVAL);
11439         }
11440
11441         link = calloc(1, sizeof(*link));
11442         if (!link)
11443                 return libbpf_err_ptr(-ENOMEM);
11444         link->detach = &bpf_link__detach_fd;
11445
11446         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
11447         if (pfd < 0) {
11448                 pfd = -errno;
11449                 free(link);
11450                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
11451                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11452                 return libbpf_err_ptr(pfd);
11453         }
11454         link->fd = pfd;
11455         return link;
11456 }
11457
11458 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11459 {
11460         static const char *const prefixes[] = {
11461                 "raw_tp",
11462                 "raw_tracepoint",
11463                 "raw_tp.w",
11464                 "raw_tracepoint.w",
11465         };
11466         size_t i;
11467         const char *tp_name = NULL;
11468
11469         *link = NULL;
11470
11471         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
11472                 size_t pfx_len;
11473
11474                 if (!str_has_pfx(prog->sec_name, prefixes[i]))
11475                         continue;
11476
11477                 pfx_len = strlen(prefixes[i]);
11478                 /* no auto-attach case of, e.g., SEC("raw_tp") */
11479                 if (prog->sec_name[pfx_len] == '\0')
11480                         return 0;
11481
11482                 if (prog->sec_name[pfx_len] != '/')
11483                         continue;
11484
11485                 tp_name = prog->sec_name + pfx_len + 1;
11486                 break;
11487         }
11488
11489         if (!tp_name) {
11490                 pr_warn("prog '%s': invalid section name '%s'\n",
11491                         prog->name, prog->sec_name);
11492                 return -EINVAL;
11493         }
11494
11495         *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
11496         return libbpf_get_error(*link);
11497 }
11498
11499 /* Common logic for all BPF program types that attach to a btf_id */
11500 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
11501                                                    const struct bpf_trace_opts *opts)
11502 {
11503         LIBBPF_OPTS(bpf_link_create_opts, link_opts);
11504         char errmsg[STRERR_BUFSIZE];
11505         struct bpf_link *link;
11506         int prog_fd, pfd;
11507
11508         if (!OPTS_VALID(opts, bpf_trace_opts))
11509                 return libbpf_err_ptr(-EINVAL);
11510
11511         prog_fd = bpf_program__fd(prog);
11512         if (prog_fd < 0) {
11513                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11514                 return libbpf_err_ptr(-EINVAL);
11515         }
11516
11517         link = calloc(1, sizeof(*link));
11518         if (!link)
11519                 return libbpf_err_ptr(-ENOMEM);
11520         link->detach = &bpf_link__detach_fd;
11521
11522         /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
11523         link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
11524         pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
11525         if (pfd < 0) {
11526                 pfd = -errno;
11527                 free(link);
11528                 pr_warn("prog '%s': failed to attach: %s\n",
11529                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11530                 return libbpf_err_ptr(pfd);
11531         }
11532         link->fd = pfd;
11533         return link;
11534 }
11535
11536 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
11537 {
11538         return bpf_program__attach_btf_id(prog, NULL);
11539 }
11540
11541 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
11542                                                 const struct bpf_trace_opts *opts)
11543 {
11544         return bpf_program__attach_btf_id(prog, opts);
11545 }
11546
11547 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
11548 {
11549         return bpf_program__attach_btf_id(prog, NULL);
11550 }
11551
11552 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11553 {
11554         *link = bpf_program__attach_trace(prog);
11555         return libbpf_get_error(*link);
11556 }
11557
11558 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11559 {
11560         *link = bpf_program__attach_lsm(prog);
11561         return libbpf_get_error(*link);
11562 }
11563
11564 static struct bpf_link *
11565 bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
11566                        const char *target_name)
11567 {
11568         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
11569                             .target_btf_id = btf_id);
11570         enum bpf_attach_type attach_type;
11571         char errmsg[STRERR_BUFSIZE];
11572         struct bpf_link *link;
11573         int prog_fd, link_fd;
11574
11575         prog_fd = bpf_program__fd(prog);
11576         if (prog_fd < 0) {
11577                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11578                 return libbpf_err_ptr(-EINVAL);
11579         }
11580
11581         link = calloc(1, sizeof(*link));
11582         if (!link)
11583                 return libbpf_err_ptr(-ENOMEM);
11584         link->detach = &bpf_link__detach_fd;
11585
11586         attach_type = bpf_program__expected_attach_type(prog);
11587         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, &opts);
11588         if (link_fd < 0) {
11589                 link_fd = -errno;
11590                 free(link);
11591                 pr_warn("prog '%s': failed to attach to %s: %s\n",
11592                         prog->name, target_name,
11593                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11594                 return libbpf_err_ptr(link_fd);
11595         }
11596         link->fd = link_fd;
11597         return link;
11598 }
11599
11600 struct bpf_link *
11601 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
11602 {
11603         return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
11604 }
11605
11606 struct bpf_link *
11607 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
11608 {
11609         return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
11610 }
11611
11612 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
11613 {
11614         /* target_fd/target_ifindex use the same field in LINK_CREATE */
11615         return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
11616 }
11617
11618 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
11619                                               int target_fd,
11620                                               const char *attach_func_name)
11621 {
11622         int btf_id;
11623
11624         if (!!target_fd != !!attach_func_name) {
11625                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
11626                         prog->name);
11627                 return libbpf_err_ptr(-EINVAL);
11628         }
11629
11630         if (prog->type != BPF_PROG_TYPE_EXT) {
11631                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
11632                         prog->name);
11633                 return libbpf_err_ptr(-EINVAL);
11634         }
11635
11636         if (target_fd) {
11637                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
11638                 if (btf_id < 0)
11639                         return libbpf_err_ptr(btf_id);
11640
11641                 return bpf_program__attach_fd(prog, target_fd, btf_id, "freplace");
11642         } else {
11643                 /* no target, so use raw_tracepoint_open for compatibility
11644                  * with old kernels
11645                  */
11646                 return bpf_program__attach_trace(prog);
11647         }
11648 }
11649
11650 struct bpf_link *
11651 bpf_program__attach_iter(const struct bpf_program *prog,
11652                          const struct bpf_iter_attach_opts *opts)
11653 {
11654         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
11655         char errmsg[STRERR_BUFSIZE];
11656         struct bpf_link *link;
11657         int prog_fd, link_fd;
11658         __u32 target_fd = 0;
11659
11660         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
11661                 return libbpf_err_ptr(-EINVAL);
11662
11663         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
11664         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
11665
11666         prog_fd = bpf_program__fd(prog);
11667         if (prog_fd < 0) {
11668                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11669                 return libbpf_err_ptr(-EINVAL);
11670         }
11671
11672         link = calloc(1, sizeof(*link));
11673         if (!link)
11674                 return libbpf_err_ptr(-ENOMEM);
11675         link->detach = &bpf_link__detach_fd;
11676
11677         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
11678                                   &link_create_opts);
11679         if (link_fd < 0) {
11680                 link_fd = -errno;
11681                 free(link);
11682                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
11683                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
11684                 return libbpf_err_ptr(link_fd);
11685         }
11686         link->fd = link_fd;
11687         return link;
11688 }
11689
11690 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11691 {
11692         *link = bpf_program__attach_iter(prog, NULL);
11693         return libbpf_get_error(*link);
11694 }
11695
11696 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
11697 {
11698         struct bpf_link *link = NULL;
11699         int err;
11700
11701         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
11702                 return libbpf_err_ptr(-EOPNOTSUPP);
11703
11704         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
11705         if (err)
11706                 return libbpf_err_ptr(err);
11707
11708         /* When calling bpf_program__attach() explicitly, auto-attach support
11709          * is expected to work, so NULL returned link is considered an error.
11710          * This is different for skeleton's attach, see comment in
11711          * bpf_object__attach_skeleton().
11712          */
11713         if (!link)
11714                 return libbpf_err_ptr(-EOPNOTSUPP);
11715
11716         return link;
11717 }
11718
11719 struct bpf_link_struct_ops {
11720         struct bpf_link link;
11721         int map_fd;
11722 };
11723
11724 static int bpf_link__detach_struct_ops(struct bpf_link *link)
11725 {
11726         struct bpf_link_struct_ops *st_link;
11727         __u32 zero = 0;
11728
11729         st_link = container_of(link, struct bpf_link_struct_ops, link);
11730
11731         if (st_link->map_fd < 0)
11732                 /* w/o a real link */
11733                 return bpf_map_delete_elem(link->fd, &zero);
11734
11735         return close(link->fd);
11736 }
11737
11738 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
11739 {
11740         struct bpf_link_struct_ops *link;
11741         __u32 zero = 0;
11742         int err, fd;
11743
11744         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
11745                 return libbpf_err_ptr(-EINVAL);
11746
11747         link = calloc(1, sizeof(*link));
11748         if (!link)
11749                 return libbpf_err_ptr(-EINVAL);
11750
11751         /* kern_vdata should be prepared during the loading phase. */
11752         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
11753         /* It can be EBUSY if the map has been used to create or
11754          * update a link before.  We don't allow updating the value of
11755          * a struct_ops once it is set.  That ensures that the value
11756          * never changed.  So, it is safe to skip EBUSY.
11757          */
11758         if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
11759                 free(link);
11760                 return libbpf_err_ptr(err);
11761         }
11762
11763         link->link.detach = bpf_link__detach_struct_ops;
11764
11765         if (!(map->def.map_flags & BPF_F_LINK)) {
11766                 /* w/o a real link */
11767                 link->link.fd = map->fd;
11768                 link->map_fd = -1;
11769                 return &link->link;
11770         }
11771
11772         fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
11773         if (fd < 0) {
11774                 free(link);
11775                 return libbpf_err_ptr(fd);
11776         }
11777
11778         link->link.fd = fd;
11779         link->map_fd = map->fd;
11780
11781         return &link->link;
11782 }
11783
11784 /*
11785  * Swap the back struct_ops of a link with a new struct_ops map.
11786  */
11787 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
11788 {
11789         struct bpf_link_struct_ops *st_ops_link;
11790         __u32 zero = 0;
11791         int err;
11792
11793         if (!bpf_map__is_struct_ops(map) || map->fd < 0)
11794                 return -EINVAL;
11795
11796         st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
11797         /* Ensure the type of a link is correct */
11798         if (st_ops_link->map_fd < 0)
11799                 return -EINVAL;
11800
11801         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
11802         /* It can be EBUSY if the map has been used to create or
11803          * update a link before.  We don't allow updating the value of
11804          * a struct_ops once it is set.  That ensures that the value
11805          * never changed.  So, it is safe to skip EBUSY.
11806          */
11807         if (err && err != -EBUSY)
11808                 return err;
11809
11810         err = bpf_link_update(link->fd, map->fd, NULL);
11811         if (err < 0)
11812                 return err;
11813
11814         st_ops_link->map_fd = map->fd;
11815
11816         return 0;
11817 }
11818
11819 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
11820                                                           void *private_data);
11821
11822 static enum bpf_perf_event_ret
11823 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
11824                        void **copy_mem, size_t *copy_size,
11825                        bpf_perf_event_print_t fn, void *private_data)
11826 {
11827         struct perf_event_mmap_page *header = mmap_mem;
11828         __u64 data_head = ring_buffer_read_head(header);
11829         __u64 data_tail = header->data_tail;
11830         void *base = ((__u8 *)header) + page_size;
11831         int ret = LIBBPF_PERF_EVENT_CONT;
11832         struct perf_event_header *ehdr;
11833         size_t ehdr_size;
11834
11835         while (data_head != data_tail) {
11836                 ehdr = base + (data_tail & (mmap_size - 1));
11837                 ehdr_size = ehdr->size;
11838
11839                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
11840                         void *copy_start = ehdr;
11841                         size_t len_first = base + mmap_size - copy_start;
11842                         size_t len_secnd = ehdr_size - len_first;
11843
11844                         if (*copy_size < ehdr_size) {
11845                                 free(*copy_mem);
11846                                 *copy_mem = malloc(ehdr_size);
11847                                 if (!*copy_mem) {
11848                                         *copy_size = 0;
11849                                         ret = LIBBPF_PERF_EVENT_ERROR;
11850                                         break;
11851                                 }
11852                                 *copy_size = ehdr_size;
11853                         }
11854
11855                         memcpy(*copy_mem, copy_start, len_first);
11856                         memcpy(*copy_mem + len_first, base, len_secnd);
11857                         ehdr = *copy_mem;
11858                 }
11859
11860                 ret = fn(ehdr, private_data);
11861                 data_tail += ehdr_size;
11862                 if (ret != LIBBPF_PERF_EVENT_CONT)
11863                         break;
11864         }
11865
11866         ring_buffer_write_tail(header, data_tail);
11867         return libbpf_err(ret);
11868 }
11869
11870 struct perf_buffer;
11871
11872 struct perf_buffer_params {
11873         struct perf_event_attr *attr;
11874         /* if event_cb is specified, it takes precendence */
11875         perf_buffer_event_fn event_cb;
11876         /* sample_cb and lost_cb are higher-level common-case callbacks */
11877         perf_buffer_sample_fn sample_cb;
11878         perf_buffer_lost_fn lost_cb;
11879         void *ctx;
11880         int cpu_cnt;
11881         int *cpus;
11882         int *map_keys;
11883 };
11884
11885 struct perf_cpu_buf {
11886         struct perf_buffer *pb;
11887         void *base; /* mmap()'ed memory */
11888         void *buf; /* for reconstructing segmented data */
11889         size_t buf_size;
11890         int fd;
11891         int cpu;
11892         int map_key;
11893 };
11894
11895 struct perf_buffer {
11896         perf_buffer_event_fn event_cb;
11897         perf_buffer_sample_fn sample_cb;
11898         perf_buffer_lost_fn lost_cb;
11899         void *ctx; /* passed into callbacks */
11900
11901         size_t page_size;
11902         size_t mmap_size;
11903         struct perf_cpu_buf **cpu_bufs;
11904         struct epoll_event *events;
11905         int cpu_cnt; /* number of allocated CPU buffers */
11906         int epoll_fd; /* perf event FD */
11907         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
11908 };
11909
11910 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
11911                                       struct perf_cpu_buf *cpu_buf)
11912 {
11913         if (!cpu_buf)
11914                 return;
11915         if (cpu_buf->base &&
11916             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
11917                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
11918         if (cpu_buf->fd >= 0) {
11919                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
11920                 close(cpu_buf->fd);
11921         }
11922         free(cpu_buf->buf);
11923         free(cpu_buf);
11924 }
11925
11926 void perf_buffer__free(struct perf_buffer *pb)
11927 {
11928         int i;
11929
11930         if (IS_ERR_OR_NULL(pb))
11931                 return;
11932         if (pb->cpu_bufs) {
11933                 for (i = 0; i < pb->cpu_cnt; i++) {
11934                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
11935
11936                         if (!cpu_buf)
11937                                 continue;
11938
11939                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
11940                         perf_buffer__free_cpu_buf(pb, cpu_buf);
11941                 }
11942                 free(pb->cpu_bufs);
11943         }
11944         if (pb->epoll_fd >= 0)
11945                 close(pb->epoll_fd);
11946         free(pb->events);
11947         free(pb);
11948 }
11949
11950 static struct perf_cpu_buf *
11951 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
11952                           int cpu, int map_key)
11953 {
11954         struct perf_cpu_buf *cpu_buf;
11955         char msg[STRERR_BUFSIZE];
11956         int err;
11957
11958         cpu_buf = calloc(1, sizeof(*cpu_buf));
11959         if (!cpu_buf)
11960                 return ERR_PTR(-ENOMEM);
11961
11962         cpu_buf->pb = pb;
11963         cpu_buf->cpu = cpu;
11964         cpu_buf->map_key = map_key;
11965
11966         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
11967                               -1, PERF_FLAG_FD_CLOEXEC);
11968         if (cpu_buf->fd < 0) {
11969                 err = -errno;
11970                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
11971                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11972                 goto error;
11973         }
11974
11975         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
11976                              PROT_READ | PROT_WRITE, MAP_SHARED,
11977                              cpu_buf->fd, 0);
11978         if (cpu_buf->base == MAP_FAILED) {
11979                 cpu_buf->base = NULL;
11980                 err = -errno;
11981                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
11982                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11983                 goto error;
11984         }
11985
11986         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
11987                 err = -errno;
11988                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
11989                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
11990                 goto error;
11991         }
11992
11993         return cpu_buf;
11994
11995 error:
11996         perf_buffer__free_cpu_buf(pb, cpu_buf);
11997         return (struct perf_cpu_buf *)ERR_PTR(err);
11998 }
11999
12000 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12001                                               struct perf_buffer_params *p);
12002
12003 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
12004                                      perf_buffer_sample_fn sample_cb,
12005                                      perf_buffer_lost_fn lost_cb,
12006                                      void *ctx,
12007                                      const struct perf_buffer_opts *opts)
12008 {
12009         const size_t attr_sz = sizeof(struct perf_event_attr);
12010         struct perf_buffer_params p = {};
12011         struct perf_event_attr attr;
12012         __u32 sample_period;
12013
12014         if (!OPTS_VALID(opts, perf_buffer_opts))
12015                 return libbpf_err_ptr(-EINVAL);
12016
12017         sample_period = OPTS_GET(opts, sample_period, 1);
12018         if (!sample_period)
12019                 sample_period = 1;
12020
12021         memset(&attr, 0, attr_sz);
12022         attr.size = attr_sz;
12023         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
12024         attr.type = PERF_TYPE_SOFTWARE;
12025         attr.sample_type = PERF_SAMPLE_RAW;
12026         attr.sample_period = sample_period;
12027         attr.wakeup_events = sample_period;
12028
12029         p.attr = &attr;
12030         p.sample_cb = sample_cb;
12031         p.lost_cb = lost_cb;
12032         p.ctx = ctx;
12033
12034         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12035 }
12036
12037 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
12038                                          struct perf_event_attr *attr,
12039                                          perf_buffer_event_fn event_cb, void *ctx,
12040                                          const struct perf_buffer_raw_opts *opts)
12041 {
12042         struct perf_buffer_params p = {};
12043
12044         if (!attr)
12045                 return libbpf_err_ptr(-EINVAL);
12046
12047         if (!OPTS_VALID(opts, perf_buffer_raw_opts))
12048                 return libbpf_err_ptr(-EINVAL);
12049
12050         p.attr = attr;
12051         p.event_cb = event_cb;
12052         p.ctx = ctx;
12053         p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
12054         p.cpus = OPTS_GET(opts, cpus, NULL);
12055         p.map_keys = OPTS_GET(opts, map_keys, NULL);
12056
12057         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12058 }
12059
12060 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12061                                               struct perf_buffer_params *p)
12062 {
12063         const char *online_cpus_file = "/sys/devices/system/cpu/online";
12064         struct bpf_map_info map;
12065         char msg[STRERR_BUFSIZE];
12066         struct perf_buffer *pb;
12067         bool *online = NULL;
12068         __u32 map_info_len;
12069         int err, i, j, n;
12070
12071         if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
12072                 pr_warn("page count should be power of two, but is %zu\n",
12073                         page_cnt);
12074                 return ERR_PTR(-EINVAL);
12075         }
12076
12077         /* best-effort sanity checks */
12078         memset(&map, 0, sizeof(map));
12079         map_info_len = sizeof(map);
12080         err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
12081         if (err) {
12082                 err = -errno;
12083                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
12084                  * -EBADFD, -EFAULT, or -E2BIG on real error
12085                  */
12086                 if (err != -EINVAL) {
12087                         pr_warn("failed to get map info for map FD %d: %s\n",
12088                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
12089                         return ERR_PTR(err);
12090                 }
12091                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
12092                          map_fd);
12093         } else {
12094                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
12095                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
12096                                 map.name);
12097                         return ERR_PTR(-EINVAL);
12098                 }
12099         }
12100
12101         pb = calloc(1, sizeof(*pb));
12102         if (!pb)
12103                 return ERR_PTR(-ENOMEM);
12104
12105         pb->event_cb = p->event_cb;
12106         pb->sample_cb = p->sample_cb;
12107         pb->lost_cb = p->lost_cb;
12108         pb->ctx = p->ctx;
12109
12110         pb->page_size = getpagesize();
12111         pb->mmap_size = pb->page_size * page_cnt;
12112         pb->map_fd = map_fd;
12113
12114         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
12115         if (pb->epoll_fd < 0) {
12116                 err = -errno;
12117                 pr_warn("failed to create epoll instance: %s\n",
12118                         libbpf_strerror_r(err, msg, sizeof(msg)));
12119                 goto error;
12120         }
12121
12122         if (p->cpu_cnt > 0) {
12123                 pb->cpu_cnt = p->cpu_cnt;
12124         } else {
12125                 pb->cpu_cnt = libbpf_num_possible_cpus();
12126                 if (pb->cpu_cnt < 0) {
12127                         err = pb->cpu_cnt;
12128                         goto error;
12129                 }
12130                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
12131                         pb->cpu_cnt = map.max_entries;
12132         }
12133
12134         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
12135         if (!pb->events) {
12136                 err = -ENOMEM;
12137                 pr_warn("failed to allocate events: out of memory\n");
12138                 goto error;
12139         }
12140         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
12141         if (!pb->cpu_bufs) {
12142                 err = -ENOMEM;
12143                 pr_warn("failed to allocate buffers: out of memory\n");
12144                 goto error;
12145         }
12146
12147         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
12148         if (err) {
12149                 pr_warn("failed to get online CPU mask: %d\n", err);
12150                 goto error;
12151         }
12152
12153         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
12154                 struct perf_cpu_buf *cpu_buf;
12155                 int cpu, map_key;
12156
12157                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
12158                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
12159
12160                 /* in case user didn't explicitly requested particular CPUs to
12161                  * be attached to, skip offline/not present CPUs
12162                  */
12163                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
12164                         continue;
12165
12166                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
12167                 if (IS_ERR(cpu_buf)) {
12168                         err = PTR_ERR(cpu_buf);
12169                         goto error;
12170                 }
12171
12172                 pb->cpu_bufs[j] = cpu_buf;
12173
12174                 err = bpf_map_update_elem(pb->map_fd, &map_key,
12175                                           &cpu_buf->fd, 0);
12176                 if (err) {
12177                         err = -errno;
12178                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
12179                                 cpu, map_key, cpu_buf->fd,
12180                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12181                         goto error;
12182                 }
12183
12184                 pb->events[j].events = EPOLLIN;
12185                 pb->events[j].data.ptr = cpu_buf;
12186                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
12187                               &pb->events[j]) < 0) {
12188                         err = -errno;
12189                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
12190                                 cpu, cpu_buf->fd,
12191                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12192                         goto error;
12193                 }
12194                 j++;
12195         }
12196         pb->cpu_cnt = j;
12197         free(online);
12198
12199         return pb;
12200
12201 error:
12202         free(online);
12203         if (pb)
12204                 perf_buffer__free(pb);
12205         return ERR_PTR(err);
12206 }
12207
12208 struct perf_sample_raw {
12209         struct perf_event_header header;
12210         uint32_t size;
12211         char data[];
12212 };
12213
12214 struct perf_sample_lost {
12215         struct perf_event_header header;
12216         uint64_t id;
12217         uint64_t lost;
12218         uint64_t sample_id;
12219 };
12220
12221 static enum bpf_perf_event_ret
12222 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
12223 {
12224         struct perf_cpu_buf *cpu_buf = ctx;
12225         struct perf_buffer *pb = cpu_buf->pb;
12226         void *data = e;
12227
12228         /* user wants full control over parsing perf event */
12229         if (pb->event_cb)
12230                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
12231
12232         switch (e->type) {
12233         case PERF_RECORD_SAMPLE: {
12234                 struct perf_sample_raw *s = data;
12235
12236                 if (pb->sample_cb)
12237                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
12238                 break;
12239         }
12240         case PERF_RECORD_LOST: {
12241                 struct perf_sample_lost *s = data;
12242
12243                 if (pb->lost_cb)
12244                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
12245                 break;
12246         }
12247         default:
12248                 pr_warn("unknown perf sample type %d\n", e->type);
12249                 return LIBBPF_PERF_EVENT_ERROR;
12250         }
12251         return LIBBPF_PERF_EVENT_CONT;
12252 }
12253
12254 static int perf_buffer__process_records(struct perf_buffer *pb,
12255                                         struct perf_cpu_buf *cpu_buf)
12256 {
12257         enum bpf_perf_event_ret ret;
12258
12259         ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
12260                                      pb->page_size, &cpu_buf->buf,
12261                                      &cpu_buf->buf_size,
12262                                      perf_buffer__process_record, cpu_buf);
12263         if (ret != LIBBPF_PERF_EVENT_CONT)
12264                 return ret;
12265         return 0;
12266 }
12267
12268 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
12269 {
12270         return pb->epoll_fd;
12271 }
12272
12273 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
12274 {
12275         int i, cnt, err;
12276
12277         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
12278         if (cnt < 0)
12279                 return -errno;
12280
12281         for (i = 0; i < cnt; i++) {
12282                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
12283
12284                 err = perf_buffer__process_records(pb, cpu_buf);
12285                 if (err) {
12286                         pr_warn("error while processing records: %d\n", err);
12287                         return libbpf_err(err);
12288                 }
12289         }
12290         return cnt;
12291 }
12292
12293 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
12294  * manager.
12295  */
12296 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
12297 {
12298         return pb->cpu_cnt;
12299 }
12300
12301 /*
12302  * Return perf_event FD of a ring buffer in *buf_idx* slot of
12303  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
12304  * select()/poll()/epoll() Linux syscalls.
12305  */
12306 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
12307 {
12308         struct perf_cpu_buf *cpu_buf;
12309
12310         if (buf_idx >= pb->cpu_cnt)
12311                 return libbpf_err(-EINVAL);
12312
12313         cpu_buf = pb->cpu_bufs[buf_idx];
12314         if (!cpu_buf)
12315                 return libbpf_err(-ENOENT);
12316
12317         return cpu_buf->fd;
12318 }
12319
12320 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
12321 {
12322         struct perf_cpu_buf *cpu_buf;
12323
12324         if (buf_idx >= pb->cpu_cnt)
12325                 return libbpf_err(-EINVAL);
12326
12327         cpu_buf = pb->cpu_bufs[buf_idx];
12328         if (!cpu_buf)
12329                 return libbpf_err(-ENOENT);
12330
12331         *buf = cpu_buf->base;
12332         *buf_size = pb->mmap_size;
12333         return 0;
12334 }
12335
12336 /*
12337  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
12338  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
12339  * consume, do nothing and return success.
12340  * Returns:
12341  *   - 0 on success;
12342  *   - <0 on failure.
12343  */
12344 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
12345 {
12346         struct perf_cpu_buf *cpu_buf;
12347
12348         if (buf_idx >= pb->cpu_cnt)
12349                 return libbpf_err(-EINVAL);
12350
12351         cpu_buf = pb->cpu_bufs[buf_idx];
12352         if (!cpu_buf)
12353                 return libbpf_err(-ENOENT);
12354
12355         return perf_buffer__process_records(pb, cpu_buf);
12356 }
12357
12358 int perf_buffer__consume(struct perf_buffer *pb)
12359 {
12360         int i, err;
12361
12362         for (i = 0; i < pb->cpu_cnt; i++) {
12363                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12364
12365                 if (!cpu_buf)
12366                         continue;
12367
12368                 err = perf_buffer__process_records(pb, cpu_buf);
12369                 if (err) {
12370                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
12371                         return libbpf_err(err);
12372                 }
12373         }
12374         return 0;
12375 }
12376
12377 int bpf_program__set_attach_target(struct bpf_program *prog,
12378                                    int attach_prog_fd,
12379                                    const char *attach_func_name)
12380 {
12381         int btf_obj_fd = 0, btf_id = 0, err;
12382
12383         if (!prog || attach_prog_fd < 0)
12384                 return libbpf_err(-EINVAL);
12385
12386         if (prog->obj->loaded)
12387                 return libbpf_err(-EINVAL);
12388
12389         if (attach_prog_fd && !attach_func_name) {
12390                 /* remember attach_prog_fd and let bpf_program__load() find
12391                  * BTF ID during the program load
12392                  */
12393                 prog->attach_prog_fd = attach_prog_fd;
12394                 return 0;
12395         }
12396
12397         if (attach_prog_fd) {
12398                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
12399                                                  attach_prog_fd);
12400                 if (btf_id < 0)
12401                         return libbpf_err(btf_id);
12402         } else {
12403                 if (!attach_func_name)
12404                         return libbpf_err(-EINVAL);
12405
12406                 /* load btf_vmlinux, if not yet */
12407                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
12408                 if (err)
12409                         return libbpf_err(err);
12410                 err = find_kernel_btf_id(prog->obj, attach_func_name,
12411                                          prog->expected_attach_type,
12412                                          &btf_obj_fd, &btf_id);
12413                 if (err)
12414                         return libbpf_err(err);
12415         }
12416
12417         prog->attach_btf_id = btf_id;
12418         prog->attach_btf_obj_fd = btf_obj_fd;
12419         prog->attach_prog_fd = attach_prog_fd;
12420         return 0;
12421 }
12422
12423 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
12424 {
12425         int err = 0, n, len, start, end = -1;
12426         bool *tmp;
12427
12428         *mask = NULL;
12429         *mask_sz = 0;
12430
12431         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
12432         while (*s) {
12433                 if (*s == ',' || *s == '\n') {
12434                         s++;
12435                         continue;
12436                 }
12437                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
12438                 if (n <= 0 || n > 2) {
12439                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
12440                         err = -EINVAL;
12441                         goto cleanup;
12442                 } else if (n == 1) {
12443                         end = start;
12444                 }
12445                 if (start < 0 || start > end) {
12446                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
12447                                 start, end, s);
12448                         err = -EINVAL;
12449                         goto cleanup;
12450                 }
12451                 tmp = realloc(*mask, end + 1);
12452                 if (!tmp) {
12453                         err = -ENOMEM;
12454                         goto cleanup;
12455                 }
12456                 *mask = tmp;
12457                 memset(tmp + *mask_sz, 0, start - *mask_sz);
12458                 memset(tmp + start, 1, end - start + 1);
12459                 *mask_sz = end + 1;
12460                 s += len;
12461         }
12462         if (!*mask_sz) {
12463                 pr_warn("Empty CPU range\n");
12464                 return -EINVAL;
12465         }
12466         return 0;
12467 cleanup:
12468         free(*mask);
12469         *mask = NULL;
12470         return err;
12471 }
12472
12473 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
12474 {
12475         int fd, err = 0, len;
12476         char buf[128];
12477
12478         fd = open(fcpu, O_RDONLY | O_CLOEXEC);
12479         if (fd < 0) {
12480                 err = -errno;
12481                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
12482                 return err;
12483         }
12484         len = read(fd, buf, sizeof(buf));
12485         close(fd);
12486         if (len <= 0) {
12487                 err = len ? -errno : -EINVAL;
12488                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
12489                 return err;
12490         }
12491         if (len >= sizeof(buf)) {
12492                 pr_warn("CPU mask is too big in file %s\n", fcpu);
12493                 return -E2BIG;
12494         }
12495         buf[len] = '\0';
12496
12497         return parse_cpu_mask_str(buf, mask, mask_sz);
12498 }
12499
12500 int libbpf_num_possible_cpus(void)
12501 {
12502         static const char *fcpu = "/sys/devices/system/cpu/possible";
12503         static int cpus;
12504         int err, n, i, tmp_cpus;
12505         bool *mask;
12506
12507         tmp_cpus = READ_ONCE(cpus);
12508         if (tmp_cpus > 0)
12509                 return tmp_cpus;
12510
12511         err = parse_cpu_mask_file(fcpu, &mask, &n);
12512         if (err)
12513                 return libbpf_err(err);
12514
12515         tmp_cpus = 0;
12516         for (i = 0; i < n; i++) {
12517                 if (mask[i])
12518                         tmp_cpus++;
12519         }
12520         free(mask);
12521
12522         WRITE_ONCE(cpus, tmp_cpus);
12523         return tmp_cpus;
12524 }
12525
12526 static int populate_skeleton_maps(const struct bpf_object *obj,
12527                                   struct bpf_map_skeleton *maps,
12528                                   size_t map_cnt)
12529 {
12530         int i;
12531
12532         for (i = 0; i < map_cnt; i++) {
12533                 struct bpf_map **map = maps[i].map;
12534                 const char *name = maps[i].name;
12535                 void **mmaped = maps[i].mmaped;
12536
12537                 *map = bpf_object__find_map_by_name(obj, name);
12538                 if (!*map) {
12539                         pr_warn("failed to find skeleton map '%s'\n", name);
12540                         return -ESRCH;
12541                 }
12542
12543                 /* externs shouldn't be pre-setup from user code */
12544                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
12545                         *mmaped = (*map)->mmaped;
12546         }
12547         return 0;
12548 }
12549
12550 static int populate_skeleton_progs(const struct bpf_object *obj,
12551                                    struct bpf_prog_skeleton *progs,
12552                                    size_t prog_cnt)
12553 {
12554         int i;
12555
12556         for (i = 0; i < prog_cnt; i++) {
12557                 struct bpf_program **prog = progs[i].prog;
12558                 const char *name = progs[i].name;
12559
12560                 *prog = bpf_object__find_program_by_name(obj, name);
12561                 if (!*prog) {
12562                         pr_warn("failed to find skeleton program '%s'\n", name);
12563                         return -ESRCH;
12564                 }
12565         }
12566         return 0;
12567 }
12568
12569 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
12570                               const struct bpf_object_open_opts *opts)
12571 {
12572         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
12573                 .object_name = s->name,
12574         );
12575         struct bpf_object *obj;
12576         int err;
12577
12578         /* Attempt to preserve opts->object_name, unless overriden by user
12579          * explicitly. Overwriting object name for skeletons is discouraged,
12580          * as it breaks global data maps, because they contain object name
12581          * prefix as their own map name prefix. When skeleton is generated,
12582          * bpftool is making an assumption that this name will stay the same.
12583          */
12584         if (opts) {
12585                 memcpy(&skel_opts, opts, sizeof(*opts));
12586                 if (!opts->object_name)
12587                         skel_opts.object_name = s->name;
12588         }
12589
12590         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
12591         err = libbpf_get_error(obj);
12592         if (err) {
12593                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
12594                         s->name, err);
12595                 return libbpf_err(err);
12596         }
12597
12598         *s->obj = obj;
12599         err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
12600         if (err) {
12601                 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
12602                 return libbpf_err(err);
12603         }
12604
12605         err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
12606         if (err) {
12607                 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
12608                 return libbpf_err(err);
12609         }
12610
12611         return 0;
12612 }
12613
12614 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
12615 {
12616         int err, len, var_idx, i;
12617         const char *var_name;
12618         const struct bpf_map *map;
12619         struct btf *btf;
12620         __u32 map_type_id;
12621         const struct btf_type *map_type, *var_type;
12622         const struct bpf_var_skeleton *var_skel;
12623         struct btf_var_secinfo *var;
12624
12625         if (!s->obj)
12626                 return libbpf_err(-EINVAL);
12627
12628         btf = bpf_object__btf(s->obj);
12629         if (!btf) {
12630                 pr_warn("subskeletons require BTF at runtime (object %s)\n",
12631                         bpf_object__name(s->obj));
12632                 return libbpf_err(-errno);
12633         }
12634
12635         err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
12636         if (err) {
12637                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12638                 return libbpf_err(err);
12639         }
12640
12641         err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
12642         if (err) {
12643                 pr_warn("failed to populate subskeleton maps: %d\n", err);
12644                 return libbpf_err(err);
12645         }
12646
12647         for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
12648                 var_skel = &s->vars[var_idx];
12649                 map = *var_skel->map;
12650                 map_type_id = bpf_map__btf_value_type_id(map);
12651                 map_type = btf__type_by_id(btf, map_type_id);
12652
12653                 if (!btf_is_datasec(map_type)) {
12654                         pr_warn("type for map '%1$s' is not a datasec: %2$s",
12655                                 bpf_map__name(map),
12656                                 __btf_kind_str(btf_kind(map_type)));
12657                         return libbpf_err(-EINVAL);
12658                 }
12659
12660                 len = btf_vlen(map_type);
12661                 var = btf_var_secinfos(map_type);
12662                 for (i = 0; i < len; i++, var++) {
12663                         var_type = btf__type_by_id(btf, var->type);
12664                         var_name = btf__name_by_offset(btf, var_type->name_off);
12665                         if (strcmp(var_name, var_skel->name) == 0) {
12666                                 *var_skel->addr = map->mmaped + var->offset;
12667                                 break;
12668                         }
12669                 }
12670         }
12671         return 0;
12672 }
12673
12674 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
12675 {
12676         if (!s)
12677                 return;
12678         free(s->maps);
12679         free(s->progs);
12680         free(s->vars);
12681         free(s);
12682 }
12683
12684 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
12685 {
12686         int i, err;
12687
12688         err = bpf_object__load(*s->obj);
12689         if (err) {
12690                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
12691                 return libbpf_err(err);
12692         }
12693
12694         for (i = 0; i < s->map_cnt; i++) {
12695                 struct bpf_map *map = *s->maps[i].map;
12696                 size_t mmap_sz = bpf_map_mmap_sz(map);
12697                 int prot, map_fd = bpf_map__fd(map);
12698                 void **mmaped = s->maps[i].mmaped;
12699
12700                 if (!mmaped)
12701                         continue;
12702
12703                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
12704                         *mmaped = NULL;
12705                         continue;
12706                 }
12707
12708                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
12709                         prot = PROT_READ;
12710                 else
12711                         prot = PROT_READ | PROT_WRITE;
12712
12713                 /* Remap anonymous mmap()-ed "map initialization image" as
12714                  * a BPF map-backed mmap()-ed memory, but preserving the same
12715                  * memory address. This will cause kernel to change process'
12716                  * page table to point to a different piece of kernel memory,
12717                  * but from userspace point of view memory address (and its
12718                  * contents, being identical at this point) will stay the
12719                  * same. This mapping will be released by bpf_object__close()
12720                  * as per normal clean up procedure, so we don't need to worry
12721                  * about it from skeleton's clean up perspective.
12722                  */
12723                 *mmaped = mmap(map->mmaped, mmap_sz, prot,
12724                                 MAP_SHARED | MAP_FIXED, map_fd, 0);
12725                 if (*mmaped == MAP_FAILED) {
12726                         err = -errno;
12727                         *mmaped = NULL;
12728                         pr_warn("failed to re-mmap() map '%s': %d\n",
12729                                  bpf_map__name(map), err);
12730                         return libbpf_err(err);
12731                 }
12732         }
12733
12734         return 0;
12735 }
12736
12737 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
12738 {
12739         int i, err;
12740
12741         for (i = 0; i < s->prog_cnt; i++) {
12742                 struct bpf_program *prog = *s->progs[i].prog;
12743                 struct bpf_link **link = s->progs[i].link;
12744
12745                 if (!prog->autoload || !prog->autoattach)
12746                         continue;
12747
12748                 /* auto-attaching not supported for this program */
12749                 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12750                         continue;
12751
12752                 /* if user already set the link manually, don't attempt auto-attach */
12753                 if (*link)
12754                         continue;
12755
12756                 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
12757                 if (err) {
12758                         pr_warn("prog '%s': failed to auto-attach: %d\n",
12759                                 bpf_program__name(prog), err);
12760                         return libbpf_err(err);
12761                 }
12762
12763                 /* It's possible that for some SEC() definitions auto-attach
12764                  * is supported in some cases (e.g., if definition completely
12765                  * specifies target information), but is not in other cases.
12766                  * SEC("uprobe") is one such case. If user specified target
12767                  * binary and function name, such BPF program can be
12768                  * auto-attached. But if not, it shouldn't trigger skeleton's
12769                  * attach to fail. It should just be skipped.
12770                  * attach_fn signals such case with returning 0 (no error) and
12771                  * setting link to NULL.
12772                  */
12773         }
12774
12775         return 0;
12776 }
12777
12778 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
12779 {
12780         int i;
12781
12782         for (i = 0; i < s->prog_cnt; i++) {
12783                 struct bpf_link **link = s->progs[i].link;
12784
12785                 bpf_link__destroy(*link);
12786                 *link = NULL;
12787         }
12788 }
12789
12790 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
12791 {
12792         if (!s)
12793                 return;
12794
12795         if (s->progs)
12796                 bpf_object__detach_skeleton(s);
12797         if (s->obj)
12798                 bpf_object__close(*s->obj);
12799         free(s->maps);
12800         free(s->progs);
12801         free(s);
12802 }