8fc62b6b1cd6327aba10b1c29f6e6e02486ec994
[platform/kernel/linux-rpi.git] / tools / lib / bpf / libbpf.c
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2
3 /*
4  * Common eBPF ELF object loading operations.
5  *
6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8  * Copyright (C) 2015 Huawei Inc.
9  * Copyright (C) 2017 Nicira, Inc.
10  * Copyright (C) 2019 Isovalent, Inc.
11  */
12
13 #ifndef _GNU_SOURCE
14 #define _GNU_SOURCE
15 #endif
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <stdarg.h>
19 #include <libgen.h>
20 #include <inttypes.h>
21 #include <string.h>
22 #include <unistd.h>
23 #include <fcntl.h>
24 #include <errno.h>
25 #include <asm/unistd.h>
26 #include <linux/err.h>
27 #include <linux/kernel.h>
28 #include <linux/bpf.h>
29 #include <linux/btf.h>
30 #include <linux/filter.h>
31 #include <linux/list.h>
32 #include <linux/limits.h>
33 #include <linux/perf_event.h>
34 #include <linux/ring_buffer.h>
35 #include <sys/epoll.h>
36 #include <sys/ioctl.h>
37 #include <sys/mman.h>
38 #include <sys/stat.h>
39 #include <sys/types.h>
40 #include <sys/vfs.h>
41 #include <tools/libc_compat.h>
42 #include <libelf.h>
43 #include <gelf.h>
44
45 #include "libbpf.h"
46 #include "bpf.h"
47 #include "btf.h"
48 #include "str_error.h"
49 #include "libbpf_internal.h"
50
51 #ifndef EM_BPF
52 #define EM_BPF 247
53 #endif
54
55 #ifndef BPF_FS_MAGIC
56 #define BPF_FS_MAGIC            0xcafe4a11
57 #endif
58
59 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
60  * compilation if user enables corresponding warning. Disable it explicitly.
61  */
62 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
63
64 #define __printf(a, b)  __attribute__((format(printf, a, b)))
65
66 static int __base_pr(enum libbpf_print_level level, const char *format,
67                      va_list args)
68 {
69         if (level == LIBBPF_DEBUG)
70                 return 0;
71
72         return vfprintf(stderr, format, args);
73 }
74
75 static libbpf_print_fn_t __libbpf_pr = __base_pr;
76
77 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
78 {
79         libbpf_print_fn_t old_print_fn = __libbpf_pr;
80
81         __libbpf_pr = fn;
82         return old_print_fn;
83 }
84
85 __printf(2, 3)
86 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
87 {
88         va_list args;
89
90         if (!__libbpf_pr)
91                 return;
92
93         va_start(args, format);
94         __libbpf_pr(level, format, args);
95         va_end(args);
96 }
97
98 #define STRERR_BUFSIZE  128
99
100 #define CHECK_ERR(action, err, out) do {        \
101         err = action;                   \
102         if (err)                        \
103                 goto out;               \
104 } while(0)
105
106
107 /* Copied from tools/perf/util/util.h */
108 #ifndef zfree
109 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
110 #endif
111
112 #ifndef zclose
113 # define zclose(fd) ({                  \
114         int ___err = 0;                 \
115         if ((fd) >= 0)                  \
116                 ___err = close((fd));   \
117         fd = -1;                        \
118         ___err; })
119 #endif
120
121 #ifdef HAVE_LIBELF_MMAP_SUPPORT
122 # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
123 #else
124 # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
125 #endif
126
127 static inline __u64 ptr_to_u64(const void *ptr)
128 {
129         return (__u64) (unsigned long) ptr;
130 }
131
132 struct bpf_capabilities {
133         /* v4.14: kernel support for program & map names. */
134         __u32 name:1;
135         /* v5.2: kernel support for global data sections. */
136         __u32 global_data:1;
137         /* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */
138         __u32 btf_func:1;
139         /* BTF_KIND_VAR and BTF_KIND_DATASEC support */
140         __u32 btf_datasec:1;
141 };
142
143 /*
144  * bpf_prog should be a better name but it has been used in
145  * linux/filter.h.
146  */
147 struct bpf_program {
148         /* Index in elf obj file, for relocation use. */
149         int idx;
150         char *name;
151         int prog_ifindex;
152         char *section_name;
153         /* section_name with / replaced by _; makes recursive pinning
154          * in bpf_object__pin_programs easier
155          */
156         char *pin_name;
157         struct bpf_insn *insns;
158         size_t insns_cnt, main_prog_cnt;
159         enum bpf_prog_type type;
160
161         struct reloc_desc {
162                 enum {
163                         RELO_LD64,
164                         RELO_CALL,
165                         RELO_DATA,
166                 } type;
167                 int insn_idx;
168                 union {
169                         int map_idx;
170                         int text_off;
171                 };
172         } *reloc_desc;
173         int nr_reloc;
174         int log_level;
175
176         struct {
177                 int nr;
178                 int *fds;
179         } instances;
180         bpf_program_prep_t preprocessor;
181
182         struct bpf_object *obj;
183         void *priv;
184         bpf_program_clear_priv_t clear_priv;
185
186         enum bpf_attach_type expected_attach_type;
187         int btf_fd;
188         void *func_info;
189         __u32 func_info_rec_size;
190         __u32 func_info_cnt;
191
192         struct bpf_capabilities *caps;
193
194         void *line_info;
195         __u32 line_info_rec_size;
196         __u32 line_info_cnt;
197         __u32 prog_flags;
198 };
199
200 enum libbpf_map_type {
201         LIBBPF_MAP_UNSPEC,
202         LIBBPF_MAP_DATA,
203         LIBBPF_MAP_BSS,
204         LIBBPF_MAP_RODATA,
205 };
206
207 static const char * const libbpf_type_to_btf_name[] = {
208         [LIBBPF_MAP_DATA]       = ".data",
209         [LIBBPF_MAP_BSS]        = ".bss",
210         [LIBBPF_MAP_RODATA]     = ".rodata",
211 };
212
213 struct bpf_map {
214         int fd;
215         char *name;
216         int sec_idx;
217         size_t sec_offset;
218         int map_ifindex;
219         int inner_map_fd;
220         struct bpf_map_def def;
221         __u32 btf_key_type_id;
222         __u32 btf_value_type_id;
223         void *priv;
224         bpf_map_clear_priv_t clear_priv;
225         enum libbpf_map_type libbpf_type;
226 };
227
228 struct bpf_secdata {
229         void *rodata;
230         void *data;
231 };
232
233 static LIST_HEAD(bpf_objects_list);
234
235 struct bpf_object {
236         char name[BPF_OBJ_NAME_LEN];
237         char license[64];
238         __u32 kern_version;
239
240         struct bpf_program *programs;
241         size_t nr_programs;
242         struct bpf_map *maps;
243         size_t nr_maps;
244         size_t maps_cap;
245         struct bpf_secdata sections;
246
247         bool loaded;
248         bool has_pseudo_calls;
249
250         /*
251          * Information when doing elf related work. Only valid if fd
252          * is valid.
253          */
254         struct {
255                 int fd;
256                 void *obj_buf;
257                 size_t obj_buf_sz;
258                 Elf *elf;
259                 GElf_Ehdr ehdr;
260                 Elf_Data *symbols;
261                 Elf_Data *data;
262                 Elf_Data *rodata;
263                 Elf_Data *bss;
264                 size_t strtabidx;
265                 struct {
266                         GElf_Shdr shdr;
267                         Elf_Data *data;
268                 } *reloc;
269                 int nr_reloc;
270                 int maps_shndx;
271                 int btf_maps_shndx;
272                 int text_shndx;
273                 int data_shndx;
274                 int rodata_shndx;
275                 int bss_shndx;
276         } efile;
277         /*
278          * All loaded bpf_object is linked in a list, which is
279          * hidden to caller. bpf_objects__<func> handlers deal with
280          * all objects.
281          */
282         struct list_head list;
283
284         struct btf *btf;
285         struct btf_ext *btf_ext;
286
287         void *priv;
288         bpf_object_clear_priv_t clear_priv;
289
290         struct bpf_capabilities caps;
291
292         char path[];
293 };
294 #define obj_elf_valid(o)        ((o)->efile.elf)
295
296 void bpf_program__unload(struct bpf_program *prog)
297 {
298         int i;
299
300         if (!prog)
301                 return;
302
303         /*
304          * If the object is opened but the program was never loaded,
305          * it is possible that prog->instances.nr == -1.
306          */
307         if (prog->instances.nr > 0) {
308                 for (i = 0; i < prog->instances.nr; i++)
309                         zclose(prog->instances.fds[i]);
310         } else if (prog->instances.nr != -1) {
311                 pr_warning("Internal error: instances.nr is %d\n",
312                            prog->instances.nr);
313         }
314
315         prog->instances.nr = -1;
316         zfree(&prog->instances.fds);
317
318         zclose(prog->btf_fd);
319         zfree(&prog->func_info);
320         zfree(&prog->line_info);
321 }
322
323 static void bpf_program__exit(struct bpf_program *prog)
324 {
325         if (!prog)
326                 return;
327
328         if (prog->clear_priv)
329                 prog->clear_priv(prog, prog->priv);
330
331         prog->priv = NULL;
332         prog->clear_priv = NULL;
333
334         bpf_program__unload(prog);
335         zfree(&prog->name);
336         zfree(&prog->section_name);
337         zfree(&prog->pin_name);
338         zfree(&prog->insns);
339         zfree(&prog->reloc_desc);
340
341         prog->nr_reloc = 0;
342         prog->insns_cnt = 0;
343         prog->idx = -1;
344 }
345
346 static char *__bpf_program__pin_name(struct bpf_program *prog)
347 {
348         char *name, *p;
349
350         name = p = strdup(prog->section_name);
351         while ((p = strchr(p, '/')))
352                 *p = '_';
353
354         return name;
355 }
356
357 static int
358 bpf_program__init(void *data, size_t size, char *section_name, int idx,
359                   struct bpf_program *prog)
360 {
361         const size_t bpf_insn_sz = sizeof(struct bpf_insn);
362
363         if (size == 0 || size % bpf_insn_sz) {
364                 pr_warning("corrupted section '%s', size: %zu\n",
365                            section_name, size);
366                 return -EINVAL;
367         }
368
369         memset(prog, 0, sizeof(*prog));
370
371         prog->section_name = strdup(section_name);
372         if (!prog->section_name) {
373                 pr_warning("failed to alloc name for prog under section(%d) %s\n",
374                            idx, section_name);
375                 goto errout;
376         }
377
378         prog->pin_name = __bpf_program__pin_name(prog);
379         if (!prog->pin_name) {
380                 pr_warning("failed to alloc pin name for prog under section(%d) %s\n",
381                            idx, section_name);
382                 goto errout;
383         }
384
385         prog->insns = malloc(size);
386         if (!prog->insns) {
387                 pr_warning("failed to alloc insns for prog under section %s\n",
388                            section_name);
389                 goto errout;
390         }
391         prog->insns_cnt = size / bpf_insn_sz;
392         memcpy(prog->insns, data, size);
393         prog->idx = idx;
394         prog->instances.fds = NULL;
395         prog->instances.nr = -1;
396         prog->type = BPF_PROG_TYPE_UNSPEC;
397         prog->btf_fd = -1;
398
399         return 0;
400 errout:
401         bpf_program__exit(prog);
402         return -ENOMEM;
403 }
404
405 static int
406 bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
407                         char *section_name, int idx)
408 {
409         struct bpf_program prog, *progs;
410         int nr_progs, err;
411
412         err = bpf_program__init(data, size, section_name, idx, &prog);
413         if (err)
414                 return err;
415
416         prog.caps = &obj->caps;
417         progs = obj->programs;
418         nr_progs = obj->nr_programs;
419
420         progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
421         if (!progs) {
422                 /*
423                  * In this case the original obj->programs
424                  * is still valid, so don't need special treat for
425                  * bpf_close_object().
426                  */
427                 pr_warning("failed to alloc a new program under section '%s'\n",
428                            section_name);
429                 bpf_program__exit(&prog);
430                 return -ENOMEM;
431         }
432
433         pr_debug("found program %s\n", prog.section_name);
434         obj->programs = progs;
435         obj->nr_programs = nr_progs + 1;
436         prog.obj = obj;
437         progs[nr_progs] = prog;
438         return 0;
439 }
440
441 static int
442 bpf_object__init_prog_names(struct bpf_object *obj)
443 {
444         Elf_Data *symbols = obj->efile.symbols;
445         struct bpf_program *prog;
446         size_t pi, si;
447
448         for (pi = 0; pi < obj->nr_programs; pi++) {
449                 const char *name = NULL;
450
451                 prog = &obj->programs[pi];
452
453                 for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
454                      si++) {
455                         GElf_Sym sym;
456
457                         if (!gelf_getsym(symbols, si, &sym))
458                                 continue;
459                         if (sym.st_shndx != prog->idx)
460                                 continue;
461                         if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
462                                 continue;
463
464                         name = elf_strptr(obj->efile.elf,
465                                           obj->efile.strtabidx,
466                                           sym.st_name);
467                         if (!name) {
468                                 pr_warning("failed to get sym name string for prog %s\n",
469                                            prog->section_name);
470                                 return -LIBBPF_ERRNO__LIBELF;
471                         }
472                 }
473
474                 if (!name && prog->idx == obj->efile.text_shndx)
475                         name = ".text";
476
477                 if (!name) {
478                         pr_warning("failed to find sym for prog %s\n",
479                                    prog->section_name);
480                         return -EINVAL;
481                 }
482
483                 prog->name = strdup(name);
484                 if (!prog->name) {
485                         pr_warning("failed to allocate memory for prog sym %s\n",
486                                    name);
487                         return -ENOMEM;
488                 }
489         }
490
491         return 0;
492 }
493
494 static struct bpf_object *bpf_object__new(const char *path,
495                                           void *obj_buf,
496                                           size_t obj_buf_sz)
497 {
498         struct bpf_object *obj;
499         char *end;
500
501         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
502         if (!obj) {
503                 pr_warning("alloc memory failed for %s\n", path);
504                 return ERR_PTR(-ENOMEM);
505         }
506
507         strcpy(obj->path, path);
508         /* Using basename() GNU version which doesn't modify arg. */
509         strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1);
510         end = strchr(obj->name, '.');
511         if (end)
512                 *end = 0;
513
514         obj->efile.fd = -1;
515         /*
516          * Caller of this function should also call
517          * bpf_object__elf_finish() after data collection to return
518          * obj_buf to user. If not, we should duplicate the buffer to
519          * avoid user freeing them before elf finish.
520          */
521         obj->efile.obj_buf = obj_buf;
522         obj->efile.obj_buf_sz = obj_buf_sz;
523         obj->efile.maps_shndx = -1;
524         obj->efile.btf_maps_shndx = -1;
525         obj->efile.data_shndx = -1;
526         obj->efile.rodata_shndx = -1;
527         obj->efile.bss_shndx = -1;
528
529         obj->loaded = false;
530
531         INIT_LIST_HEAD(&obj->list);
532         list_add(&obj->list, &bpf_objects_list);
533         return obj;
534 }
535
536 static void bpf_object__elf_finish(struct bpf_object *obj)
537 {
538         if (!obj_elf_valid(obj))
539                 return;
540
541         if (obj->efile.elf) {
542                 elf_end(obj->efile.elf);
543                 obj->efile.elf = NULL;
544         }
545         obj->efile.symbols = NULL;
546         obj->efile.data = NULL;
547         obj->efile.rodata = NULL;
548         obj->efile.bss = NULL;
549
550         zfree(&obj->efile.reloc);
551         obj->efile.nr_reloc = 0;
552         zclose(obj->efile.fd);
553         obj->efile.obj_buf = NULL;
554         obj->efile.obj_buf_sz = 0;
555 }
556
557 static int bpf_object__elf_init(struct bpf_object *obj)
558 {
559         int err = 0;
560         GElf_Ehdr *ep;
561
562         if (obj_elf_valid(obj)) {
563                 pr_warning("elf init: internal error\n");
564                 return -LIBBPF_ERRNO__LIBELF;
565         }
566
567         if (obj->efile.obj_buf_sz > 0) {
568                 /*
569                  * obj_buf should have been validated by
570                  * bpf_object__open_buffer().
571                  */
572                 obj->efile.elf = elf_memory(obj->efile.obj_buf,
573                                             obj->efile.obj_buf_sz);
574         } else {
575                 obj->efile.fd = open(obj->path, O_RDONLY);
576                 if (obj->efile.fd < 0) {
577                         char errmsg[STRERR_BUFSIZE], *cp;
578
579                         err = -errno;
580                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
581                         pr_warning("failed to open %s: %s\n", obj->path, cp);
582                         return err;
583                 }
584
585                 obj->efile.elf = elf_begin(obj->efile.fd,
586                                            LIBBPF_ELF_C_READ_MMAP, NULL);
587         }
588
589         if (!obj->efile.elf) {
590                 pr_warning("failed to open %s as ELF file\n", obj->path);
591                 err = -LIBBPF_ERRNO__LIBELF;
592                 goto errout;
593         }
594
595         if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
596                 pr_warning("failed to get EHDR from %s\n", obj->path);
597                 err = -LIBBPF_ERRNO__FORMAT;
598                 goto errout;
599         }
600         ep = &obj->efile.ehdr;
601
602         /* Old LLVM set e_machine to EM_NONE */
603         if (ep->e_type != ET_REL ||
604             (ep->e_machine && ep->e_machine != EM_BPF)) {
605                 pr_warning("%s is not an eBPF object file\n", obj->path);
606                 err = -LIBBPF_ERRNO__FORMAT;
607                 goto errout;
608         }
609
610         return 0;
611 errout:
612         bpf_object__elf_finish(obj);
613         return err;
614 }
615
616 static int bpf_object__check_endianness(struct bpf_object *obj)
617 {
618 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
619         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2LSB)
620                 return 0;
621 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
622         if (obj->efile.ehdr.e_ident[EI_DATA] == ELFDATA2MSB)
623                 return 0;
624 #else
625 # error "Unrecognized __BYTE_ORDER__"
626 #endif
627         pr_warning("endianness mismatch.\n");
628         return -LIBBPF_ERRNO__ENDIAN;
629 }
630
631 static int
632 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
633 {
634         memcpy(obj->license, data, min(size, sizeof(obj->license) - 1));
635         pr_debug("license of %s is %s\n", obj->path, obj->license);
636         return 0;
637 }
638
639 static int
640 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
641 {
642         __u32 kver;
643
644         if (size != sizeof(kver)) {
645                 pr_warning("invalid kver section in %s\n", obj->path);
646                 return -LIBBPF_ERRNO__FORMAT;
647         }
648         memcpy(&kver, data, sizeof(kver));
649         obj->kern_version = kver;
650         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
651         return 0;
652 }
653
654 static int compare_bpf_map(const void *_a, const void *_b)
655 {
656         const struct bpf_map *a = _a;
657         const struct bpf_map *b = _b;
658
659         if (a->sec_idx != b->sec_idx)
660                 return a->sec_idx - b->sec_idx;
661         return a->sec_offset - b->sec_offset;
662 }
663
664 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
665 {
666         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
667             type == BPF_MAP_TYPE_HASH_OF_MAPS)
668                 return true;
669         return false;
670 }
671
672 static int bpf_object_search_section_size(const struct bpf_object *obj,
673                                           const char *name, size_t *d_size)
674 {
675         const GElf_Ehdr *ep = &obj->efile.ehdr;
676         Elf *elf = obj->efile.elf;
677         Elf_Scn *scn = NULL;
678         int idx = 0;
679
680         while ((scn = elf_nextscn(elf, scn)) != NULL) {
681                 const char *sec_name;
682                 Elf_Data *data;
683                 GElf_Shdr sh;
684
685                 idx++;
686                 if (gelf_getshdr(scn, &sh) != &sh) {
687                         pr_warning("failed to get section(%d) header from %s\n",
688                                    idx, obj->path);
689                         return -EIO;
690                 }
691
692                 sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
693                 if (!sec_name) {
694                         pr_warning("failed to get section(%d) name from %s\n",
695                                    idx, obj->path);
696                         return -EIO;
697                 }
698
699                 if (strcmp(name, sec_name))
700                         continue;
701
702                 data = elf_getdata(scn, 0);
703                 if (!data) {
704                         pr_warning("failed to get section(%d) data from %s(%s)\n",
705                                    idx, name, obj->path);
706                         return -EIO;
707                 }
708
709                 *d_size = data->d_size;
710                 return 0;
711         }
712
713         return -ENOENT;
714 }
715
716 int bpf_object__section_size(const struct bpf_object *obj, const char *name,
717                              __u32 *size)
718 {
719         int ret = -ENOENT;
720         size_t d_size;
721
722         *size = 0;
723         if (!name) {
724                 return -EINVAL;
725         } else if (!strcmp(name, ".data")) {
726                 if (obj->efile.data)
727                         *size = obj->efile.data->d_size;
728         } else if (!strcmp(name, ".bss")) {
729                 if (obj->efile.bss)
730                         *size = obj->efile.bss->d_size;
731         } else if (!strcmp(name, ".rodata")) {
732                 if (obj->efile.rodata)
733                         *size = obj->efile.rodata->d_size;
734         } else {
735                 ret = bpf_object_search_section_size(obj, name, &d_size);
736                 if (!ret)
737                         *size = d_size;
738         }
739
740         return *size ? 0 : ret;
741 }
742
743 int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,
744                                 __u32 *off)
745 {
746         Elf_Data *symbols = obj->efile.symbols;
747         const char *sname;
748         size_t si;
749
750         if (!name || !off)
751                 return -EINVAL;
752
753         for (si = 0; si < symbols->d_size / sizeof(GElf_Sym); si++) {
754                 GElf_Sym sym;
755
756                 if (!gelf_getsym(symbols, si, &sym))
757                         continue;
758                 if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL ||
759                     GELF_ST_TYPE(sym.st_info) != STT_OBJECT)
760                         continue;
761
762                 sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
763                                    sym.st_name);
764                 if (!sname) {
765                         pr_warning("failed to get sym name string for var %s\n",
766                                    name);
767                         return -EIO;
768                 }
769                 if (strcmp(name, sname) == 0) {
770                         *off = sym.st_value;
771                         return 0;
772                 }
773         }
774
775         return -ENOENT;
776 }
777
778 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
779 {
780         struct bpf_map *new_maps;
781         size_t new_cap;
782         int i;
783
784         if (obj->nr_maps < obj->maps_cap)
785                 return &obj->maps[obj->nr_maps++];
786
787         new_cap = max((size_t)4, obj->maps_cap * 3 / 2);
788         new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));
789         if (!new_maps) {
790                 pr_warning("alloc maps for object failed\n");
791                 return ERR_PTR(-ENOMEM);
792         }
793
794         obj->maps_cap = new_cap;
795         obj->maps = new_maps;
796
797         /* zero out new maps */
798         memset(obj->maps + obj->nr_maps, 0,
799                (obj->maps_cap - obj->nr_maps) * sizeof(*obj->maps));
800         /*
801          * fill all fd with -1 so won't close incorrect fd (fd=0 is stdin)
802          * when failure (zclose won't close negative fd)).
803          */
804         for (i = obj->nr_maps; i < obj->maps_cap; i++) {
805                 obj->maps[i].fd = -1;
806                 obj->maps[i].inner_map_fd = -1;
807         }
808
809         return &obj->maps[obj->nr_maps++];
810 }
811
812 static int
813 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
814                               int sec_idx, Elf_Data *data, void **data_buff)
815 {
816         char map_name[BPF_OBJ_NAME_LEN];
817         struct bpf_map_def *def;
818         struct bpf_map *map;
819
820         map = bpf_object__add_map(obj);
821         if (IS_ERR(map))
822                 return PTR_ERR(map);
823
824         map->libbpf_type = type;
825         map->sec_idx = sec_idx;
826         map->sec_offset = 0;
827         snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name,
828                  libbpf_type_to_btf_name[type]);
829         map->name = strdup(map_name);
830         if (!map->name) {
831                 pr_warning("failed to alloc map name\n");
832                 return -ENOMEM;
833         }
834         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n",
835                  map_name, map->sec_idx, map->sec_offset);
836
837         def = &map->def;
838         def->type = BPF_MAP_TYPE_ARRAY;
839         def->key_size = sizeof(int);
840         def->value_size = data->d_size;
841         def->max_entries = 1;
842         def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0;
843         if (data_buff) {
844                 *data_buff = malloc(data->d_size);
845                 if (!*data_buff) {
846                         zfree(&map->name);
847                         pr_warning("failed to alloc map content buffer\n");
848                         return -ENOMEM;
849                 }
850                 memcpy(*data_buff, data->d_buf, data->d_size);
851         }
852
853         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
854         return 0;
855 }
856
857 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
858 {
859         int err;
860
861         if (!obj->caps.global_data)
862                 return 0;
863         /*
864          * Populate obj->maps with libbpf internal maps.
865          */
866         if (obj->efile.data_shndx >= 0) {
867                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
868                                                     obj->efile.data_shndx,
869                                                     obj->efile.data,
870                                                     &obj->sections.data);
871                 if (err)
872                         return err;
873         }
874         if (obj->efile.rodata_shndx >= 0) {
875                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
876                                                     obj->efile.rodata_shndx,
877                                                     obj->efile.rodata,
878                                                     &obj->sections.rodata);
879                 if (err)
880                         return err;
881         }
882         if (obj->efile.bss_shndx >= 0) {
883                 err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
884                                                     obj->efile.bss_shndx,
885                                                     obj->efile.bss, NULL);
886                 if (err)
887                         return err;
888         }
889         return 0;
890 }
891
892 static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
893 {
894         Elf_Data *symbols = obj->efile.symbols;
895         int i, map_def_sz = 0, nr_maps = 0, nr_syms;
896         Elf_Data *data = NULL;
897         Elf_Scn *scn;
898
899         if (obj->efile.maps_shndx < 0)
900                 return 0;
901
902         if (!symbols)
903                 return -EINVAL;
904
905         scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
906         if (scn)
907                 data = elf_getdata(scn, NULL);
908         if (!scn || !data) {
909                 pr_warning("failed to get Elf_Data from map section %d\n",
910                            obj->efile.maps_shndx);
911                 return -EINVAL;
912         }
913
914         /*
915          * Count number of maps. Each map has a name.
916          * Array of maps is not supported: only the first element is
917          * considered.
918          *
919          * TODO: Detect array of map and report error.
920          */
921         nr_syms = symbols->d_size / sizeof(GElf_Sym);
922         for (i = 0; i < nr_syms; i++) {
923                 GElf_Sym sym;
924
925                 if (!gelf_getsym(symbols, i, &sym))
926                         continue;
927                 if (sym.st_shndx != obj->efile.maps_shndx)
928                         continue;
929                 nr_maps++;
930         }
931         /* Assume equally sized map definitions */
932         pr_debug("maps in %s: %d maps in %zd bytes\n",
933                  obj->path, nr_maps, data->d_size);
934
935         map_def_sz = data->d_size / nr_maps;
936         if (!data->d_size || (data->d_size % nr_maps) != 0) {
937                 pr_warning("unable to determine map definition size "
938                            "section %s, %d maps in %zd bytes\n",
939                            obj->path, nr_maps, data->d_size);
940                 return -EINVAL;
941         }
942
943         /* Fill obj->maps using data in "maps" section.  */
944         for (i = 0; i < nr_syms; i++) {
945                 GElf_Sym sym;
946                 const char *map_name;
947                 struct bpf_map_def *def;
948                 struct bpf_map *map;
949
950                 if (!gelf_getsym(symbols, i, &sym))
951                         continue;
952                 if (sym.st_shndx != obj->efile.maps_shndx)
953                         continue;
954
955                 map = bpf_object__add_map(obj);
956                 if (IS_ERR(map))
957                         return PTR_ERR(map);
958
959                 map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
960                                       sym.st_name);
961                 if (!map_name) {
962                         pr_warning("failed to get map #%d name sym string for obj %s\n",
963                                    i, obj->path);
964                         return -LIBBPF_ERRNO__FORMAT;
965                 }
966
967                 map->libbpf_type = LIBBPF_MAP_UNSPEC;
968                 map->sec_idx = sym.st_shndx;
969                 map->sec_offset = sym.st_value;
970                 pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",
971                          map_name, map->sec_idx, map->sec_offset);
972                 if (sym.st_value + map_def_sz > data->d_size) {
973                         pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
974                                    obj->path, map_name);
975                         return -EINVAL;
976                 }
977
978                 map->name = strdup(map_name);
979                 if (!map->name) {
980                         pr_warning("failed to alloc map name\n");
981                         return -ENOMEM;
982                 }
983                 pr_debug("map %d is \"%s\"\n", i, map->name);
984                 def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
985                 /*
986                  * If the definition of the map in the object file fits in
987                  * bpf_map_def, copy it.  Any extra fields in our version
988                  * of bpf_map_def will default to zero as a result of the
989                  * calloc above.
990                  */
991                 if (map_def_sz <= sizeof(struct bpf_map_def)) {
992                         memcpy(&map->def, def, map_def_sz);
993                 } else {
994                         /*
995                          * Here the map structure being read is bigger than what
996                          * we expect, truncate if the excess bits are all zero.
997                          * If they are not zero, reject this map as
998                          * incompatible.
999                          */
1000                         char *b;
1001                         for (b = ((char *)def) + sizeof(struct bpf_map_def);
1002                              b < ((char *)def) + map_def_sz; b++) {
1003                                 if (*b != 0) {
1004                                         pr_warning("maps section in %s: \"%s\" "
1005                                                    "has unrecognized, non-zero "
1006                                                    "options\n",
1007                                                    obj->path, map_name);
1008                                         if (strict)
1009                                                 return -EINVAL;
1010                                 }
1011                         }
1012                         memcpy(&map->def, def, sizeof(struct bpf_map_def));
1013                 }
1014         }
1015         return 0;
1016 }
1017
1018 static const struct btf_type *skip_mods_and_typedefs(const struct btf *btf,
1019                                                      __u32 id)
1020 {
1021         const struct btf_type *t = btf__type_by_id(btf, id);
1022
1023         while (true) {
1024                 switch (BTF_INFO_KIND(t->info)) {
1025                 case BTF_KIND_VOLATILE:
1026                 case BTF_KIND_CONST:
1027                 case BTF_KIND_RESTRICT:
1028                 case BTF_KIND_TYPEDEF:
1029                         t = btf__type_by_id(btf, t->type);
1030                         break;
1031                 default:
1032                         return t;
1033                 }
1034         }
1035 }
1036
1037 /*
1038  * Fetch integer attribute of BTF map definition. Such attributes are
1039  * represented using a pointer to an array, in which dimensionality of array
1040  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
1041  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
1042  * type definition, while using only sizeof(void *) space in ELF data section.
1043  */
1044 static bool get_map_field_int(const char *map_name, const struct btf *btf,
1045                               const struct btf_type *def,
1046                               const struct btf_member *m, __u32 *res) {
1047         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type);
1048         const char *name = btf__name_by_offset(btf, m->name_off);
1049         const struct btf_array *arr_info;
1050         const struct btf_type *arr_t;
1051
1052         if (!btf_is_ptr(t)) {
1053                 pr_warning("map '%s': attr '%s': expected PTR, got %u.\n",
1054                            map_name, name, btf_kind(t));
1055                 return false;
1056         }
1057
1058         arr_t = btf__type_by_id(btf, t->type);
1059         if (!arr_t) {
1060                 pr_warning("map '%s': attr '%s': type [%u] not found.\n",
1061                            map_name, name, t->type);
1062                 return false;
1063         }
1064         if (!btf_is_array(arr_t)) {
1065                 pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n",
1066                            map_name, name, btf_kind(arr_t));
1067                 return false;
1068         }
1069         arr_info = btf_array(arr_t);
1070         *res = arr_info->nelems;
1071         return true;
1072 }
1073
1074 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
1075                                          const struct btf_type *sec,
1076                                          int var_idx, int sec_idx,
1077                                          const Elf_Data *data, bool strict)
1078 {
1079         const struct btf_type *var, *def, *t;
1080         const struct btf_var_secinfo *vi;
1081         const struct btf_var *var_extra;
1082         const struct btf_member *m;
1083         const char *map_name;
1084         struct bpf_map *map;
1085         int vlen, i;
1086
1087         vi = btf_var_secinfos(sec) + var_idx;
1088         var = btf__type_by_id(obj->btf, vi->type);
1089         var_extra = btf_var(var);
1090         map_name = btf__name_by_offset(obj->btf, var->name_off);
1091         vlen = btf_vlen(var);
1092
1093         if (map_name == NULL || map_name[0] == '\0') {
1094                 pr_warning("map #%d: empty name.\n", var_idx);
1095                 return -EINVAL;
1096         }
1097         if ((__u64)vi->offset + vi->size > data->d_size) {
1098                 pr_warning("map '%s' BTF data is corrupted.\n", map_name);
1099                 return -EINVAL;
1100         }
1101         if (!btf_is_var(var)) {
1102                 pr_warning("map '%s': unexpected var kind %u.\n",
1103                            map_name, btf_kind(var));
1104                 return -EINVAL;
1105         }
1106         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
1107             var_extra->linkage != BTF_VAR_STATIC) {
1108                 pr_warning("map '%s': unsupported var linkage %u.\n",
1109                            map_name, var_extra->linkage);
1110                 return -EOPNOTSUPP;
1111         }
1112
1113         def = skip_mods_and_typedefs(obj->btf, var->type);
1114         if (!btf_is_struct(def)) {
1115                 pr_warning("map '%s': unexpected def kind %u.\n",
1116                            map_name, btf_kind(var));
1117                 return -EINVAL;
1118         }
1119         if (def->size > vi->size) {
1120                 pr_warning("map '%s': invalid def size.\n", map_name);
1121                 return -EINVAL;
1122         }
1123
1124         map = bpf_object__add_map(obj);
1125         if (IS_ERR(map))
1126                 return PTR_ERR(map);
1127         map->name = strdup(map_name);
1128         if (!map->name) {
1129                 pr_warning("map '%s': failed to alloc map name.\n", map_name);
1130                 return -ENOMEM;
1131         }
1132         map->libbpf_type = LIBBPF_MAP_UNSPEC;
1133         map->def.type = BPF_MAP_TYPE_UNSPEC;
1134         map->sec_idx = sec_idx;
1135         map->sec_offset = vi->offset;
1136         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
1137                  map_name, map->sec_idx, map->sec_offset);
1138
1139         vlen = btf_vlen(def);
1140         m = btf_members(def);
1141         for (i = 0; i < vlen; i++, m++) {
1142                 const char *name = btf__name_by_offset(obj->btf, m->name_off);
1143
1144                 if (!name) {
1145                         pr_warning("map '%s': invalid field #%d.\n",
1146                                    map_name, i);
1147                         return -EINVAL;
1148                 }
1149                 if (strcmp(name, "type") == 0) {
1150                         if (!get_map_field_int(map_name, obj->btf, def, m,
1151                                                &map->def.type))
1152                                 return -EINVAL;
1153                         pr_debug("map '%s': found type = %u.\n",
1154                                  map_name, map->def.type);
1155                 } else if (strcmp(name, "max_entries") == 0) {
1156                         if (!get_map_field_int(map_name, obj->btf, def, m,
1157                                                &map->def.max_entries))
1158                                 return -EINVAL;
1159                         pr_debug("map '%s': found max_entries = %u.\n",
1160                                  map_name, map->def.max_entries);
1161                 } else if (strcmp(name, "map_flags") == 0) {
1162                         if (!get_map_field_int(map_name, obj->btf, def, m,
1163                                                &map->def.map_flags))
1164                                 return -EINVAL;
1165                         pr_debug("map '%s': found map_flags = %u.\n",
1166                                  map_name, map->def.map_flags);
1167                 } else if (strcmp(name, "key_size") == 0) {
1168                         __u32 sz;
1169
1170                         if (!get_map_field_int(map_name, obj->btf, def, m,
1171                                                &sz))
1172                                 return -EINVAL;
1173                         pr_debug("map '%s': found key_size = %u.\n",
1174                                  map_name, sz);
1175                         if (map->def.key_size && map->def.key_size != sz) {
1176                                 pr_warning("map '%s': conflicting key size %u != %u.\n",
1177                                            map_name, map->def.key_size, sz);
1178                                 return -EINVAL;
1179                         }
1180                         map->def.key_size = sz;
1181                 } else if (strcmp(name, "key") == 0) {
1182                         __s64 sz;
1183
1184                         t = btf__type_by_id(obj->btf, m->type);
1185                         if (!t) {
1186                                 pr_warning("map '%s': key type [%d] not found.\n",
1187                                            map_name, m->type);
1188                                 return -EINVAL;
1189                         }
1190                         if (!btf_is_ptr(t)) {
1191                                 pr_warning("map '%s': key spec is not PTR: %u.\n",
1192                                            map_name, btf_kind(t));
1193                                 return -EINVAL;
1194                         }
1195                         sz = btf__resolve_size(obj->btf, t->type);
1196                         if (sz < 0) {
1197                                 pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n",
1198                                            map_name, t->type, sz);
1199                                 return sz;
1200                         }
1201                         pr_debug("map '%s': found key [%u], sz = %lld.\n",
1202                                  map_name, t->type, sz);
1203                         if (map->def.key_size && map->def.key_size != sz) {
1204                                 pr_warning("map '%s': conflicting key size %u != %lld.\n",
1205                                            map_name, map->def.key_size, sz);
1206                                 return -EINVAL;
1207                         }
1208                         map->def.key_size = sz;
1209                         map->btf_key_type_id = t->type;
1210                 } else if (strcmp(name, "value_size") == 0) {
1211                         __u32 sz;
1212
1213                         if (!get_map_field_int(map_name, obj->btf, def, m,
1214                                                &sz))
1215                                 return -EINVAL;
1216                         pr_debug("map '%s': found value_size = %u.\n",
1217                                  map_name, sz);
1218                         if (map->def.value_size && map->def.value_size != sz) {
1219                                 pr_warning("map '%s': conflicting value size %u != %u.\n",
1220                                            map_name, map->def.value_size, sz);
1221                                 return -EINVAL;
1222                         }
1223                         map->def.value_size = sz;
1224                 } else if (strcmp(name, "value") == 0) {
1225                         __s64 sz;
1226
1227                         t = btf__type_by_id(obj->btf, m->type);
1228                         if (!t) {
1229                                 pr_warning("map '%s': value type [%d] not found.\n",
1230                                            map_name, m->type);
1231                                 return -EINVAL;
1232                         }
1233                         if (!btf_is_ptr(t)) {
1234                                 pr_warning("map '%s': value spec is not PTR: %u.\n",
1235                                            map_name, btf_kind(t));
1236                                 return -EINVAL;
1237                         }
1238                         sz = btf__resolve_size(obj->btf, t->type);
1239                         if (sz < 0) {
1240                                 pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n",
1241                                            map_name, t->type, sz);
1242                                 return sz;
1243                         }
1244                         pr_debug("map '%s': found value [%u], sz = %lld.\n",
1245                                  map_name, t->type, sz);
1246                         if (map->def.value_size && map->def.value_size != sz) {
1247                                 pr_warning("map '%s': conflicting value size %u != %lld.\n",
1248                                            map_name, map->def.value_size, sz);
1249                                 return -EINVAL;
1250                         }
1251                         map->def.value_size = sz;
1252                         map->btf_value_type_id = t->type;
1253                 } else {
1254                         if (strict) {
1255                                 pr_warning("map '%s': unknown field '%s'.\n",
1256                                            map_name, name);
1257                                 return -ENOTSUP;
1258                         }
1259                         pr_debug("map '%s': ignoring unknown field '%s'.\n",
1260                                  map_name, name);
1261                 }
1262         }
1263
1264         if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
1265                 pr_warning("map '%s': map type isn't specified.\n", map_name);
1266                 return -EINVAL;
1267         }
1268
1269         return 0;
1270 }
1271
1272 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)
1273 {
1274         const struct btf_type *sec = NULL;
1275         int nr_types, i, vlen, err;
1276         const struct btf_type *t;
1277         const char *name;
1278         Elf_Data *data;
1279         Elf_Scn *scn;
1280
1281         if (obj->efile.btf_maps_shndx < 0)
1282                 return 0;
1283
1284         scn = elf_getscn(obj->efile.elf, obj->efile.btf_maps_shndx);
1285         if (scn)
1286                 data = elf_getdata(scn, NULL);
1287         if (!scn || !data) {
1288                 pr_warning("failed to get Elf_Data from map section %d (%s)\n",
1289                            obj->efile.maps_shndx, MAPS_ELF_SEC);
1290                 return -EINVAL;
1291         }
1292
1293         nr_types = btf__get_nr_types(obj->btf);
1294         for (i = 1; i <= nr_types; i++) {
1295                 t = btf__type_by_id(obj->btf, i);
1296                 if (!btf_is_datasec(t))
1297                         continue;
1298                 name = btf__name_by_offset(obj->btf, t->name_off);
1299                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
1300                         sec = t;
1301                         break;
1302                 }
1303         }
1304
1305         if (!sec) {
1306                 pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
1307                 return -ENOENT;
1308         }
1309
1310         vlen = btf_vlen(sec);
1311         for (i = 0; i < vlen; i++) {
1312                 err = bpf_object__init_user_btf_map(obj, sec, i,
1313                                                     obj->efile.btf_maps_shndx,
1314                                                     data, strict);
1315                 if (err)
1316                         return err;
1317         }
1318
1319         return 0;
1320 }
1321
1322 static int bpf_object__init_maps(struct bpf_object *obj, int flags)
1323 {
1324         bool strict = !(flags & MAPS_RELAX_COMPAT);
1325         int err;
1326
1327         err = bpf_object__init_user_maps(obj, strict);
1328         if (err)
1329                 return err;
1330
1331         err = bpf_object__init_user_btf_maps(obj, strict);
1332         if (err)
1333                 return err;
1334
1335         err = bpf_object__init_global_data_maps(obj);
1336         if (err)
1337                 return err;
1338
1339         if (obj->nr_maps) {
1340                 qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]),
1341                       compare_bpf_map);
1342         }
1343         return 0;
1344 }
1345
1346 static bool section_have_execinstr(struct bpf_object *obj, int idx)
1347 {
1348         Elf_Scn *scn;
1349         GElf_Shdr sh;
1350
1351         scn = elf_getscn(obj->efile.elf, idx);
1352         if (!scn)
1353                 return false;
1354
1355         if (gelf_getshdr(scn, &sh) != &sh)
1356                 return false;
1357
1358         if (sh.sh_flags & SHF_EXECINSTR)
1359                 return true;
1360
1361         return false;
1362 }
1363
1364 static void bpf_object__sanitize_btf(struct bpf_object *obj)
1365 {
1366         bool has_datasec = obj->caps.btf_datasec;
1367         bool has_func = obj->caps.btf_func;
1368         struct btf *btf = obj->btf;
1369         struct btf_type *t;
1370         int i, j, vlen;
1371
1372         if (!obj->btf || (has_func && has_datasec))
1373                 return;
1374
1375         for (i = 1; i <= btf__get_nr_types(btf); i++) {
1376                 t = (struct btf_type *)btf__type_by_id(btf, i);
1377
1378                 if (!has_datasec && btf_is_var(t)) {
1379                         /* replace VAR with INT */
1380                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
1381                         t->size = sizeof(int);
1382                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 32);
1383                 } else if (!has_datasec && btf_is_datasec(t)) {
1384                         /* replace DATASEC with STRUCT */
1385                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
1386                         struct btf_member *m = btf_members(t);
1387                         struct btf_type *vt;
1388                         char *name;
1389
1390                         name = (char *)btf__name_by_offset(btf, t->name_off);
1391                         while (*name) {
1392                                 if (*name == '.')
1393                                         *name = '_';
1394                                 name++;
1395                         }
1396
1397                         vlen = btf_vlen(t);
1398                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
1399                         for (j = 0; j < vlen; j++, v++, m++) {
1400                                 /* order of field assignments is important */
1401                                 m->offset = v->offset * 8;
1402                                 m->type = v->type;
1403                                 /* preserve variable name as member name */
1404                                 vt = (void *)btf__type_by_id(btf, v->type);
1405                                 m->name_off = vt->name_off;
1406                         }
1407                 } else if (!has_func && btf_is_func_proto(t)) {
1408                         /* replace FUNC_PROTO with ENUM */
1409                         vlen = btf_vlen(t);
1410                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
1411                         t->size = sizeof(__u32); /* kernel enforced */
1412                 } else if (!has_func && btf_is_func(t)) {
1413                         /* replace FUNC with TYPEDEF */
1414                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
1415                 }
1416         }
1417 }
1418
1419 static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
1420 {
1421         if (!obj->btf_ext)
1422                 return;
1423
1424         if (!obj->caps.btf_func) {
1425                 btf_ext__free(obj->btf_ext);
1426                 obj->btf_ext = NULL;
1427         }
1428 }
1429
1430 static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
1431 {
1432         return obj->efile.btf_maps_shndx >= 0;
1433 }
1434
1435 static int bpf_object__init_btf(struct bpf_object *obj,
1436                                 Elf_Data *btf_data,
1437                                 Elf_Data *btf_ext_data)
1438 {
1439         bool btf_required = bpf_object__is_btf_mandatory(obj);
1440         int err = 0;
1441
1442         if (btf_data) {
1443                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
1444                 if (IS_ERR(obj->btf)) {
1445                         pr_warning("Error loading ELF section %s: %d.\n",
1446                                    BTF_ELF_SEC, err);
1447                         goto out;
1448                 }
1449                 err = btf__finalize_data(obj, obj->btf);
1450                 if (err) {
1451                         pr_warning("Error finalizing %s: %d.\n",
1452                                    BTF_ELF_SEC, err);
1453                         goto out;
1454                 }
1455         }
1456         if (btf_ext_data) {
1457                 if (!obj->btf) {
1458                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
1459                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
1460                         goto out;
1461                 }
1462                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,
1463                                             btf_ext_data->d_size);
1464                 if (IS_ERR(obj->btf_ext)) {
1465                         pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
1466                                    BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));
1467                         obj->btf_ext = NULL;
1468                         goto out;
1469                 }
1470         }
1471 out:
1472         if (err || IS_ERR(obj->btf)) {
1473                 if (btf_required)
1474                         err = err ? : PTR_ERR(obj->btf);
1475                 else
1476                         err = 0;
1477                 if (!IS_ERR_OR_NULL(obj->btf))
1478                         btf__free(obj->btf);
1479                 obj->btf = NULL;
1480         }
1481         if (btf_required && !obj->btf) {
1482                 pr_warning("BTF is required, but is missing or corrupted.\n");
1483                 return err == 0 ? -ENOENT : err;
1484         }
1485         return 0;
1486 }
1487
1488 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
1489 {
1490         int err = 0;
1491
1492         if (!obj->btf)
1493                 return 0;
1494
1495         bpf_object__sanitize_btf(obj);
1496         bpf_object__sanitize_btf_ext(obj);
1497
1498         err = btf__load(obj->btf);
1499         if (err) {
1500                 pr_warning("Error loading %s into kernel: %d.\n",
1501                            BTF_ELF_SEC, err);
1502                 btf__free(obj->btf);
1503                 obj->btf = NULL;
1504                 if (bpf_object__is_btf_mandatory(obj))
1505                         return err;
1506         }
1507         return 0;
1508 }
1509
1510 static int bpf_object__elf_collect(struct bpf_object *obj, int flags)
1511 {
1512         Elf *elf = obj->efile.elf;
1513         GElf_Ehdr *ep = &obj->efile.ehdr;
1514         Elf_Data *btf_ext_data = NULL;
1515         Elf_Data *btf_data = NULL;
1516         Elf_Scn *scn = NULL;
1517         int idx = 0, err = 0;
1518
1519         /* Elf is corrupted/truncated, avoid calling elf_strptr. */
1520         if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
1521                 pr_warning("failed to get e_shstrndx from %s\n", obj->path);
1522                 return -LIBBPF_ERRNO__FORMAT;
1523         }
1524
1525         while ((scn = elf_nextscn(elf, scn)) != NULL) {
1526                 char *name;
1527                 GElf_Shdr sh;
1528                 Elf_Data *data;
1529
1530                 idx++;
1531                 if (gelf_getshdr(scn, &sh) != &sh) {
1532                         pr_warning("failed to get section(%d) header from %s\n",
1533                                    idx, obj->path);
1534                         return -LIBBPF_ERRNO__FORMAT;
1535                 }
1536
1537                 name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
1538                 if (!name) {
1539                         pr_warning("failed to get section(%d) name from %s\n",
1540                                    idx, obj->path);
1541                         return -LIBBPF_ERRNO__FORMAT;
1542                 }
1543
1544                 data = elf_getdata(scn, 0);
1545                 if (!data) {
1546                         pr_warning("failed to get section(%d) data from %s(%s)\n",
1547                                    idx, name, obj->path);
1548                         return -LIBBPF_ERRNO__FORMAT;
1549                 }
1550                 pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
1551                          idx, name, (unsigned long)data->d_size,
1552                          (int)sh.sh_link, (unsigned long)sh.sh_flags,
1553                          (int)sh.sh_type);
1554
1555                 if (strcmp(name, "license") == 0) {
1556                         err = bpf_object__init_license(obj,
1557                                                        data->d_buf,
1558                                                        data->d_size);
1559                         if (err)
1560                                 return err;
1561                 } else if (strcmp(name, "version") == 0) {
1562                         err = bpf_object__init_kversion(obj,
1563                                                         data->d_buf,
1564                                                         data->d_size);
1565                         if (err)
1566                                 return err;
1567                 } else if (strcmp(name, "maps") == 0) {
1568                         obj->efile.maps_shndx = idx;
1569                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
1570                         obj->efile.btf_maps_shndx = idx;
1571                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
1572                         btf_data = data;
1573                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
1574                         btf_ext_data = data;
1575                 } else if (sh.sh_type == SHT_SYMTAB) {
1576                         if (obj->efile.symbols) {
1577                                 pr_warning("bpf: multiple SYMTAB in %s\n",
1578                                            obj->path);
1579                                 return -LIBBPF_ERRNO__FORMAT;
1580                         }
1581                         obj->efile.symbols = data;
1582                         obj->efile.strtabidx = sh.sh_link;
1583                 } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) {
1584                         if (sh.sh_flags & SHF_EXECINSTR) {
1585                                 if (strcmp(name, ".text") == 0)
1586                                         obj->efile.text_shndx = idx;
1587                                 err = bpf_object__add_program(obj, data->d_buf,
1588                                                               data->d_size, name, idx);
1589                                 if (err) {
1590                                         char errmsg[STRERR_BUFSIZE];
1591                                         char *cp = libbpf_strerror_r(-err, errmsg,
1592                                                                      sizeof(errmsg));
1593
1594                                         pr_warning("failed to alloc program %s (%s): %s",
1595                                                    name, obj->path, cp);
1596                                         return err;
1597                                 }
1598                         } else if (strcmp(name, ".data") == 0) {
1599                                 obj->efile.data = data;
1600                                 obj->efile.data_shndx = idx;
1601                         } else if (strcmp(name, ".rodata") == 0) {
1602                                 obj->efile.rodata = data;
1603                                 obj->efile.rodata_shndx = idx;
1604                         } else {
1605                                 pr_debug("skip section(%d) %s\n", idx, name);
1606                         }
1607                 } else if (sh.sh_type == SHT_REL) {
1608                         int nr_reloc = obj->efile.nr_reloc;
1609                         void *reloc = obj->efile.reloc;
1610                         int sec = sh.sh_info; /* points to other section */
1611
1612                         /* Only do relo for section with exec instructions */
1613                         if (!section_have_execinstr(obj, sec)) {
1614                                 pr_debug("skip relo %s(%d) for section(%d)\n",
1615                                          name, idx, sec);
1616                                 continue;
1617                         }
1618
1619                         reloc = reallocarray(reloc, nr_reloc + 1,
1620                                              sizeof(*obj->efile.reloc));
1621                         if (!reloc) {
1622                                 pr_warning("realloc failed\n");
1623                                 return -ENOMEM;
1624                         }
1625
1626                         obj->efile.reloc = reloc;
1627                         obj->efile.nr_reloc++;
1628
1629                         obj->efile.reloc[nr_reloc].shdr = sh;
1630                         obj->efile.reloc[nr_reloc].data = data;
1631                 } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {
1632                         obj->efile.bss = data;
1633                         obj->efile.bss_shndx = idx;
1634                 } else {
1635                         pr_debug("skip section(%d) %s\n", idx, name);
1636                 }
1637         }
1638
1639         if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
1640                 pr_warning("Corrupted ELF file: index of strtab invalid\n");
1641                 return -LIBBPF_ERRNO__FORMAT;
1642         }
1643         err = bpf_object__init_btf(obj, btf_data, btf_ext_data);
1644         if (!err)
1645                 err = bpf_object__init_maps(obj, flags);
1646         if (!err)
1647                 err = bpf_object__sanitize_and_load_btf(obj);
1648         if (!err)
1649                 err = bpf_object__init_prog_names(obj);
1650         return err;
1651 }
1652
1653 static struct bpf_program *
1654 bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
1655 {
1656         struct bpf_program *prog;
1657         size_t i;
1658
1659         for (i = 0; i < obj->nr_programs; i++) {
1660                 prog = &obj->programs[i];
1661                 if (prog->idx == idx)
1662                         return prog;
1663         }
1664         return NULL;
1665 }
1666
1667 struct bpf_program *
1668 bpf_object__find_program_by_title(const struct bpf_object *obj,
1669                                   const char *title)
1670 {
1671         struct bpf_program *pos;
1672
1673         bpf_object__for_each_program(pos, obj) {
1674                 if (pos->section_name && !strcmp(pos->section_name, title))
1675                         return pos;
1676         }
1677         return NULL;
1678 }
1679
1680 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
1681                                       int shndx)
1682 {
1683         return shndx == obj->efile.data_shndx ||
1684                shndx == obj->efile.bss_shndx ||
1685                shndx == obj->efile.rodata_shndx;
1686 }
1687
1688 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
1689                                       int shndx)
1690 {
1691         return shndx == obj->efile.maps_shndx ||
1692                shndx == obj->efile.btf_maps_shndx;
1693 }
1694
1695 static bool bpf_object__relo_in_known_section(const struct bpf_object *obj,
1696                                               int shndx)
1697 {
1698         return shndx == obj->efile.text_shndx ||
1699                bpf_object__shndx_is_maps(obj, shndx) ||
1700                bpf_object__shndx_is_data(obj, shndx);
1701 }
1702
1703 static enum libbpf_map_type
1704 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
1705 {
1706         if (shndx == obj->efile.data_shndx)
1707                 return LIBBPF_MAP_DATA;
1708         else if (shndx == obj->efile.bss_shndx)
1709                 return LIBBPF_MAP_BSS;
1710         else if (shndx == obj->efile.rodata_shndx)
1711                 return LIBBPF_MAP_RODATA;
1712         else
1713                 return LIBBPF_MAP_UNSPEC;
1714 }
1715
1716 static int
1717 bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
1718                            Elf_Data *data, struct bpf_object *obj)
1719 {
1720         Elf_Data *symbols = obj->efile.symbols;
1721         struct bpf_map *maps = obj->maps;
1722         size_t nr_maps = obj->nr_maps;
1723         int i, nrels;
1724
1725         pr_debug("collecting relocating info for: '%s'\n", prog->section_name);
1726         nrels = shdr->sh_size / shdr->sh_entsize;
1727
1728         prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
1729         if (!prog->reloc_desc) {
1730                 pr_warning("failed to alloc memory in relocation\n");
1731                 return -ENOMEM;
1732         }
1733         prog->nr_reloc = nrels;
1734
1735         for (i = 0; i < nrels; i++) {
1736                 struct bpf_insn *insns = prog->insns;
1737                 enum libbpf_map_type type;
1738                 unsigned int insn_idx;
1739                 unsigned int shdr_idx;
1740                 const char *name;
1741                 size_t map_idx;
1742                 GElf_Sym sym;
1743                 GElf_Rel rel;
1744
1745                 if (!gelf_getrel(data, i, &rel)) {
1746                         pr_warning("relocation: failed to get %d reloc\n", i);
1747                         return -LIBBPF_ERRNO__FORMAT;
1748                 }
1749
1750                 if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
1751                         pr_warning("relocation: symbol %"PRIx64" not found\n",
1752                                    GELF_R_SYM(rel.r_info));
1753                         return -LIBBPF_ERRNO__FORMAT;
1754                 }
1755
1756                 name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
1757                                   sym.st_name) ? : "<?>";
1758
1759                 pr_debug("relo for %lld value %lld name %d (\'%s\')\n",
1760                          (long long) (rel.r_info >> 32),
1761                          (long long) sym.st_value, sym.st_name, name);
1762
1763                 shdr_idx = sym.st_shndx;
1764                 insn_idx = rel.r_offset / sizeof(struct bpf_insn);
1765                 pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n",
1766                          insn_idx, shdr_idx);
1767
1768                 if (shdr_idx >= SHN_LORESERVE) {
1769                         pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n",
1770                                    name, shdr_idx, insn_idx,
1771                                    insns[insn_idx].code);
1772                         return -LIBBPF_ERRNO__RELOC;
1773                 }
1774                 if (!bpf_object__relo_in_known_section(obj, shdr_idx)) {
1775                         pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n",
1776                                    prog->section_name, shdr_idx);
1777                         return -LIBBPF_ERRNO__RELOC;
1778                 }
1779
1780                 if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
1781                         if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
1782                                 pr_warning("incorrect bpf_call opcode\n");
1783                                 return -LIBBPF_ERRNO__RELOC;
1784                         }
1785                         prog->reloc_desc[i].type = RELO_CALL;
1786                         prog->reloc_desc[i].insn_idx = insn_idx;
1787                         prog->reloc_desc[i].text_off = sym.st_value;
1788                         obj->has_pseudo_calls = true;
1789                         continue;
1790                 }
1791
1792                 if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
1793                         pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
1794                                    insn_idx, insns[insn_idx].code);
1795                         return -LIBBPF_ERRNO__RELOC;
1796                 }
1797
1798                 if (bpf_object__shndx_is_maps(obj, shdr_idx) ||
1799                     bpf_object__shndx_is_data(obj, shdr_idx)) {
1800                         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
1801                         if (type != LIBBPF_MAP_UNSPEC) {
1802                                 if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) {
1803                                         pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n",
1804                                                    name, insn_idx, insns[insn_idx].code);
1805                                         return -LIBBPF_ERRNO__RELOC;
1806                                 }
1807                                 if (!obj->caps.global_data) {
1808                                         pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n",
1809                                                    name, insn_idx);
1810                                         return -LIBBPF_ERRNO__RELOC;
1811                                 }
1812                         }
1813
1814                         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
1815                                 if (maps[map_idx].libbpf_type != type)
1816                                         continue;
1817                                 if (type != LIBBPF_MAP_UNSPEC ||
1818                                     (maps[map_idx].sec_idx == sym.st_shndx &&
1819                                      maps[map_idx].sec_offset == sym.st_value)) {
1820                                         pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n",
1821                                                  map_idx, maps[map_idx].name,
1822                                                  maps[map_idx].sec_idx,
1823                                                  maps[map_idx].sec_offset,
1824                                                  insn_idx);
1825                                         break;
1826                                 }
1827                         }
1828
1829                         if (map_idx >= nr_maps) {
1830                                 pr_warning("bpf relocation: map_idx %d larger than %d\n",
1831                                            (int)map_idx, (int)nr_maps - 1);
1832                                 return -LIBBPF_ERRNO__RELOC;
1833                         }
1834
1835                         prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ?
1836                                                    RELO_DATA : RELO_LD64;
1837                         prog->reloc_desc[i].insn_idx = insn_idx;
1838                         prog->reloc_desc[i].map_idx = map_idx;
1839                 }
1840         }
1841         return 0;
1842 }
1843
1844 static int bpf_map_find_btf_info(struct bpf_object *obj, struct bpf_map *map)
1845 {
1846         struct bpf_map_def *def = &map->def;
1847         __u32 key_type_id = 0, value_type_id = 0;
1848         int ret;
1849
1850         /* if it's BTF-defined map, we don't need to search for type IDs */
1851         if (map->sec_idx == obj->efile.btf_maps_shndx)
1852                 return 0;
1853
1854         if (!bpf_map__is_internal(map)) {
1855                 ret = btf__get_map_kv_tids(obj->btf, map->name, def->key_size,
1856                                            def->value_size, &key_type_id,
1857                                            &value_type_id);
1858         } else {
1859                 /*
1860                  * LLVM annotates global data differently in BTF, that is,
1861                  * only as '.data', '.bss' or '.rodata'.
1862                  */
1863                 ret = btf__find_by_name(obj->btf,
1864                                 libbpf_type_to_btf_name[map->libbpf_type]);
1865         }
1866         if (ret < 0)
1867                 return ret;
1868
1869         map->btf_key_type_id = key_type_id;
1870         map->btf_value_type_id = bpf_map__is_internal(map) ?
1871                                  ret : value_type_id;
1872         return 0;
1873 }
1874
1875 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
1876 {
1877         struct bpf_map_info info = {};
1878         __u32 len = sizeof(info);
1879         int new_fd, err;
1880         char *new_name;
1881
1882         err = bpf_obj_get_info_by_fd(fd, &info, &len);
1883         if (err)
1884                 return err;
1885
1886         new_name = strdup(info.name);
1887         if (!new_name)
1888                 return -errno;
1889
1890         new_fd = open("/", O_RDONLY | O_CLOEXEC);
1891         if (new_fd < 0)
1892                 goto err_free_new_name;
1893
1894         new_fd = dup3(fd, new_fd, O_CLOEXEC);
1895         if (new_fd < 0)
1896                 goto err_close_new_fd;
1897
1898         err = zclose(map->fd);
1899         if (err)
1900                 goto err_close_new_fd;
1901         free(map->name);
1902
1903         map->fd = new_fd;
1904         map->name = new_name;
1905         map->def.type = info.type;
1906         map->def.key_size = info.key_size;
1907         map->def.value_size = info.value_size;
1908         map->def.max_entries = info.max_entries;
1909         map->def.map_flags = info.map_flags;
1910         map->btf_key_type_id = info.btf_key_type_id;
1911         map->btf_value_type_id = info.btf_value_type_id;
1912
1913         return 0;
1914
1915 err_close_new_fd:
1916         close(new_fd);
1917 err_free_new_name:
1918         free(new_name);
1919         return -errno;
1920 }
1921
1922 int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
1923 {
1924         if (!map || !max_entries)
1925                 return -EINVAL;
1926
1927         /* If map already created, its attributes can't be changed. */
1928         if (map->fd >= 0)
1929                 return -EBUSY;
1930
1931         map->def.max_entries = max_entries;
1932
1933         return 0;
1934 }
1935
1936 static int
1937 bpf_object__probe_name(struct bpf_object *obj)
1938 {
1939         struct bpf_load_program_attr attr;
1940         char *cp, errmsg[STRERR_BUFSIZE];
1941         struct bpf_insn insns[] = {
1942                 BPF_MOV64_IMM(BPF_REG_0, 0),
1943                 BPF_EXIT_INSN(),
1944         };
1945         int ret;
1946
1947         /* make sure basic loading works */
1948
1949         memset(&attr, 0, sizeof(attr));
1950         attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
1951         attr.insns = insns;
1952         attr.insns_cnt = ARRAY_SIZE(insns);
1953         attr.license = "GPL";
1954
1955         ret = bpf_load_program_xattr(&attr, NULL, 0);
1956         if (ret < 0) {
1957                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
1958                 pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
1959                            __func__, cp, errno);
1960                 return -errno;
1961         }
1962         close(ret);
1963
1964         /* now try the same program, but with the name */
1965
1966         attr.name = "test";
1967         ret = bpf_load_program_xattr(&attr, NULL, 0);
1968         if (ret >= 0) {
1969                 obj->caps.name = 1;
1970                 close(ret);
1971         }
1972
1973         return 0;
1974 }
1975
1976 static int
1977 bpf_object__probe_global_data(struct bpf_object *obj)
1978 {
1979         struct bpf_load_program_attr prg_attr;
1980         struct bpf_create_map_attr map_attr;
1981         char *cp, errmsg[STRERR_BUFSIZE];
1982         struct bpf_insn insns[] = {
1983                 BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16),
1984                 BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
1985                 BPF_MOV64_IMM(BPF_REG_0, 0),
1986                 BPF_EXIT_INSN(),
1987         };
1988         int ret, map;
1989
1990         memset(&map_attr, 0, sizeof(map_attr));
1991         map_attr.map_type = BPF_MAP_TYPE_ARRAY;
1992         map_attr.key_size = sizeof(int);
1993         map_attr.value_size = 32;
1994         map_attr.max_entries = 1;
1995
1996         map = bpf_create_map_xattr(&map_attr);
1997         if (map < 0) {
1998                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
1999                 pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n",
2000                            __func__, cp, errno);
2001                 return -errno;
2002         }
2003
2004         insns[0].imm = map;
2005
2006         memset(&prg_attr, 0, sizeof(prg_attr));
2007         prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
2008         prg_attr.insns = insns;
2009         prg_attr.insns_cnt = ARRAY_SIZE(insns);
2010         prg_attr.license = "GPL";
2011
2012         ret = bpf_load_program_xattr(&prg_attr, NULL, 0);
2013         if (ret >= 0) {
2014                 obj->caps.global_data = 1;
2015                 close(ret);
2016         }
2017
2018         close(map);
2019         return 0;
2020 }
2021
2022 static int bpf_object__probe_btf_func(struct bpf_object *obj)
2023 {
2024         const char strs[] = "\0int\0x\0a";
2025         /* void x(int a) {} */
2026         __u32 types[] = {
2027                 /* int */
2028                 BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
2029                 /* FUNC_PROTO */                                /* [2] */
2030                 BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0),
2031                 BTF_PARAM_ENC(7, 1),
2032                 /* FUNC x */                                    /* [3] */
2033                 BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2),
2034         };
2035         int btf_fd;
2036
2037         btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
2038                                       strs, sizeof(strs));
2039         if (btf_fd >= 0) {
2040                 obj->caps.btf_func = 1;
2041                 close(btf_fd);
2042                 return 1;
2043         }
2044
2045         return 0;
2046 }
2047
2048 static int bpf_object__probe_btf_datasec(struct bpf_object *obj)
2049 {
2050         const char strs[] = "\0x\0.data";
2051         /* static int a; */
2052         __u32 types[] = {
2053                 /* int */
2054                 BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
2055                 /* VAR x */                                     /* [2] */
2056                 BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
2057                 BTF_VAR_STATIC,
2058                 /* DATASEC val */                               /* [3] */
2059                 BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
2060                 BTF_VAR_SECINFO_ENC(2, 0, 4),
2061         };
2062         int btf_fd;
2063
2064         btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types),
2065                                       strs, sizeof(strs));
2066         if (btf_fd >= 0) {
2067                 obj->caps.btf_datasec = 1;
2068                 close(btf_fd);
2069                 return 1;
2070         }
2071
2072         return 0;
2073 }
2074
2075 static int
2076 bpf_object__probe_caps(struct bpf_object *obj)
2077 {
2078         int (*probe_fn[])(struct bpf_object *obj) = {
2079                 bpf_object__probe_name,
2080                 bpf_object__probe_global_data,
2081                 bpf_object__probe_btf_func,
2082                 bpf_object__probe_btf_datasec,
2083         };
2084         int i, ret;
2085
2086         for (i = 0; i < ARRAY_SIZE(probe_fn); i++) {
2087                 ret = probe_fn[i](obj);
2088                 if (ret < 0)
2089                         pr_debug("Probe #%d failed with %d.\n", i, ret);
2090         }
2091
2092         return 0;
2093 }
2094
2095 static int
2096 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
2097 {
2098         char *cp, errmsg[STRERR_BUFSIZE];
2099         int err, zero = 0;
2100         __u8 *data;
2101
2102         /* Nothing to do here since kernel already zero-initializes .bss map. */
2103         if (map->libbpf_type == LIBBPF_MAP_BSS)
2104                 return 0;
2105
2106         data = map->libbpf_type == LIBBPF_MAP_DATA ?
2107                obj->sections.data : obj->sections.rodata;
2108
2109         err = bpf_map_update_elem(map->fd, &zero, data, 0);
2110         /* Freeze .rodata map as read-only from syscall side. */
2111         if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) {
2112                 err = bpf_map_freeze(map->fd);
2113                 if (err) {
2114                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
2115                         pr_warning("Error freezing map(%s) as read-only: %s\n",
2116                                    map->name, cp);
2117                         err = 0;
2118                 }
2119         }
2120         return err;
2121 }
2122
2123 static int
2124 bpf_object__create_maps(struct bpf_object *obj)
2125 {
2126         struct bpf_create_map_attr create_attr = {};
2127         int nr_cpus = 0;
2128         unsigned int i;
2129         int err;
2130
2131         for (i = 0; i < obj->nr_maps; i++) {
2132                 struct bpf_map *map = &obj->maps[i];
2133                 struct bpf_map_def *def = &map->def;
2134                 char *cp, errmsg[STRERR_BUFSIZE];
2135                 int *pfd = &map->fd;
2136
2137                 if (map->fd >= 0) {
2138                         pr_debug("skip map create (preset) %s: fd=%d\n",
2139                                  map->name, map->fd);
2140                         continue;
2141                 }
2142
2143                 if (obj->caps.name)
2144                         create_attr.name = map->name;
2145                 create_attr.map_ifindex = map->map_ifindex;
2146                 create_attr.map_type = def->type;
2147                 create_attr.map_flags = def->map_flags;
2148                 create_attr.key_size = def->key_size;
2149                 create_attr.value_size = def->value_size;
2150                 if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
2151                     !def->max_entries) {
2152                         if (!nr_cpus)
2153                                 nr_cpus = libbpf_num_possible_cpus();
2154                         if (nr_cpus < 0) {
2155                                 pr_warning("failed to determine number of system CPUs: %d\n",
2156                                            nr_cpus);
2157                                 err = nr_cpus;
2158                                 goto err_out;
2159                         }
2160                         pr_debug("map '%s': setting size to %d\n",
2161                                  map->name, nr_cpus);
2162                         create_attr.max_entries = nr_cpus;
2163                 } else {
2164                         create_attr.max_entries = def->max_entries;
2165                 }
2166                 create_attr.btf_fd = 0;
2167                 create_attr.btf_key_type_id = 0;
2168                 create_attr.btf_value_type_id = 0;
2169                 if (bpf_map_type__is_map_in_map(def->type) &&
2170                     map->inner_map_fd >= 0)
2171                         create_attr.inner_map_fd = map->inner_map_fd;
2172
2173                 if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
2174                         create_attr.btf_fd = btf__fd(obj->btf);
2175                         create_attr.btf_key_type_id = map->btf_key_type_id;
2176                         create_attr.btf_value_type_id = map->btf_value_type_id;
2177                 }
2178
2179                 *pfd = bpf_create_map_xattr(&create_attr);
2180                 if (*pfd < 0 && (create_attr.btf_key_type_id ||
2181                                  create_attr.btf_value_type_id)) {
2182                         err = -errno;
2183                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
2184                         pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
2185                                    map->name, cp, err);
2186                         create_attr.btf_fd = 0;
2187                         create_attr.btf_key_type_id = 0;
2188                         create_attr.btf_value_type_id = 0;
2189                         map->btf_key_type_id = 0;
2190                         map->btf_value_type_id = 0;
2191                         *pfd = bpf_create_map_xattr(&create_attr);
2192                 }
2193
2194                 if (*pfd < 0) {
2195                         size_t j;
2196
2197                         err = -errno;
2198 err_out:
2199                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
2200                         pr_warning("failed to create map (name: '%s'): %s(%d)\n",
2201                                    map->name, cp, err);
2202                         for (j = 0; j < i; j++)
2203                                 zclose(obj->maps[j].fd);
2204                         return err;
2205                 }
2206
2207                 if (bpf_map__is_internal(map)) {
2208                         err = bpf_object__populate_internal_map(obj, map);
2209                         if (err < 0) {
2210                                 zclose(*pfd);
2211                                 goto err_out;
2212                         }
2213                 }
2214
2215                 pr_debug("created map %s: fd=%d\n", map->name, *pfd);
2216         }
2217
2218         return 0;
2219 }
2220
2221 static int
2222 check_btf_ext_reloc_err(struct bpf_program *prog, int err,
2223                         void *btf_prog_info, const char *info_name)
2224 {
2225         if (err != -ENOENT) {
2226                 pr_warning("Error in loading %s for sec %s.\n",
2227                            info_name, prog->section_name);
2228                 return err;
2229         }
2230
2231         /* err == -ENOENT (i.e. prog->section_name not found in btf_ext) */
2232
2233         if (btf_prog_info) {
2234                 /*
2235                  * Some info has already been found but has problem
2236                  * in the last btf_ext reloc. Must have to error out.
2237                  */
2238                 pr_warning("Error in relocating %s for sec %s.\n",
2239                            info_name, prog->section_name);
2240                 return err;
2241         }
2242
2243         /* Have problem loading the very first info. Ignore the rest. */
2244         pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n",
2245                    info_name, prog->section_name, info_name);
2246         return 0;
2247 }
2248
2249 static int
2250 bpf_program_reloc_btf_ext(struct bpf_program *prog, struct bpf_object *obj,
2251                           const char *section_name,  __u32 insn_offset)
2252 {
2253         int err;
2254
2255         if (!insn_offset || prog->func_info) {
2256                 /*
2257                  * !insn_offset => main program
2258                  *
2259                  * For sub prog, the main program's func_info has to
2260                  * be loaded first (i.e. prog->func_info != NULL)
2261                  */
2262                 err = btf_ext__reloc_func_info(obj->btf, obj->btf_ext,
2263                                                section_name, insn_offset,
2264                                                &prog->func_info,
2265                                                &prog->func_info_cnt);
2266                 if (err)
2267                         return check_btf_ext_reloc_err(prog, err,
2268                                                        prog->func_info,
2269                                                        "bpf_func_info");
2270
2271                 prog->func_info_rec_size = btf_ext__func_info_rec_size(obj->btf_ext);
2272         }
2273
2274         if (!insn_offset || prog->line_info) {
2275                 err = btf_ext__reloc_line_info(obj->btf, obj->btf_ext,
2276                                                section_name, insn_offset,
2277                                                &prog->line_info,
2278                                                &prog->line_info_cnt);
2279                 if (err)
2280                         return check_btf_ext_reloc_err(prog, err,
2281                                                        prog->line_info,
2282                                                        "bpf_line_info");
2283
2284                 prog->line_info_rec_size = btf_ext__line_info_rec_size(obj->btf_ext);
2285         }
2286
2287         if (!insn_offset)
2288                 prog->btf_fd = btf__fd(obj->btf);
2289
2290         return 0;
2291 }
2292
2293 static int
2294 bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
2295                         struct reloc_desc *relo)
2296 {
2297         struct bpf_insn *insn, *new_insn;
2298         struct bpf_program *text;
2299         size_t new_cnt;
2300         int err;
2301
2302         if (relo->type != RELO_CALL)
2303                 return -LIBBPF_ERRNO__RELOC;
2304
2305         if (prog->idx == obj->efile.text_shndx) {
2306                 pr_warning("relo in .text insn %d into off %d\n",
2307                            relo->insn_idx, relo->text_off);
2308                 return -LIBBPF_ERRNO__RELOC;
2309         }
2310
2311         if (prog->main_prog_cnt == 0) {
2312                 text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
2313                 if (!text) {
2314                         pr_warning("no .text section found yet relo into text exist\n");
2315                         return -LIBBPF_ERRNO__RELOC;
2316                 }
2317                 new_cnt = prog->insns_cnt + text->insns_cnt;
2318                 new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
2319                 if (!new_insn) {
2320                         pr_warning("oom in prog realloc\n");
2321                         return -ENOMEM;
2322                 }
2323
2324                 if (obj->btf_ext) {
2325                         err = bpf_program_reloc_btf_ext(prog, obj,
2326                                                         text->section_name,
2327                                                         prog->insns_cnt);
2328                         if (err)
2329                                 return err;
2330                 }
2331
2332                 memcpy(new_insn + prog->insns_cnt, text->insns,
2333                        text->insns_cnt * sizeof(*insn));
2334                 prog->insns = new_insn;
2335                 prog->main_prog_cnt = prog->insns_cnt;
2336                 prog->insns_cnt = new_cnt;
2337                 pr_debug("added %zd insn from %s to prog %s\n",
2338                          text->insns_cnt, text->section_name,
2339                          prog->section_name);
2340         }
2341         insn = &prog->insns[relo->insn_idx];
2342         insn->imm += prog->main_prog_cnt - relo->insn_idx;
2343         return 0;
2344 }
2345
2346 static int
2347 bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
2348 {
2349         int i, err;
2350
2351         if (!prog)
2352                 return 0;
2353
2354         if (obj->btf_ext) {
2355                 err = bpf_program_reloc_btf_ext(prog, obj,
2356                                                 prog->section_name, 0);
2357                 if (err)
2358                         return err;
2359         }
2360
2361         if (!prog->reloc_desc)
2362                 return 0;
2363
2364         for (i = 0; i < prog->nr_reloc; i++) {
2365                 if (prog->reloc_desc[i].type == RELO_LD64 ||
2366                     prog->reloc_desc[i].type == RELO_DATA) {
2367                         bool relo_data = prog->reloc_desc[i].type == RELO_DATA;
2368                         struct bpf_insn *insns = prog->insns;
2369                         int insn_idx, map_idx;
2370
2371                         insn_idx = prog->reloc_desc[i].insn_idx;
2372                         map_idx = prog->reloc_desc[i].map_idx;
2373
2374                         if (insn_idx + 1 >= (int)prog->insns_cnt) {
2375                                 pr_warning("relocation out of range: '%s'\n",
2376                                            prog->section_name);
2377                                 return -LIBBPF_ERRNO__RELOC;
2378                         }
2379
2380                         if (!relo_data) {
2381                                 insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
2382                         } else {
2383                                 insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE;
2384                                 insns[insn_idx + 1].imm = insns[insn_idx].imm;
2385                         }
2386                         insns[insn_idx].imm = obj->maps[map_idx].fd;
2387                 } else if (prog->reloc_desc[i].type == RELO_CALL) {
2388                         err = bpf_program__reloc_text(prog, obj,
2389                                                       &prog->reloc_desc[i]);
2390                         if (err)
2391                                 return err;
2392                 }
2393         }
2394
2395         zfree(&prog->reloc_desc);
2396         prog->nr_reloc = 0;
2397         return 0;
2398 }
2399
2400
2401 static int
2402 bpf_object__relocate(struct bpf_object *obj)
2403 {
2404         struct bpf_program *prog;
2405         size_t i;
2406         int err;
2407
2408         for (i = 0; i < obj->nr_programs; i++) {
2409                 prog = &obj->programs[i];
2410
2411                 err = bpf_program__relocate(prog, obj);
2412                 if (err) {
2413                         pr_warning("failed to relocate '%s'\n",
2414                                    prog->section_name);
2415                         return err;
2416                 }
2417         }
2418         return 0;
2419 }
2420
2421 static int bpf_object__collect_reloc(struct bpf_object *obj)
2422 {
2423         int i, err;
2424
2425         if (!obj_elf_valid(obj)) {
2426                 pr_warning("Internal error: elf object is closed\n");
2427                 return -LIBBPF_ERRNO__INTERNAL;
2428         }
2429
2430         for (i = 0; i < obj->efile.nr_reloc; i++) {
2431                 GElf_Shdr *shdr = &obj->efile.reloc[i].shdr;
2432                 Elf_Data *data = obj->efile.reloc[i].data;
2433                 int idx = shdr->sh_info;
2434                 struct bpf_program *prog;
2435
2436                 if (shdr->sh_type != SHT_REL) {
2437                         pr_warning("internal error at %d\n", __LINE__);
2438                         return -LIBBPF_ERRNO__INTERNAL;
2439                 }
2440
2441                 prog = bpf_object__find_prog_by_idx(obj, idx);
2442                 if (!prog) {
2443                         pr_warning("relocation failed: no section(%d)\n", idx);
2444                         return -LIBBPF_ERRNO__RELOC;
2445                 }
2446
2447                 err = bpf_program__collect_reloc(prog, shdr, data, obj);
2448                 if (err)
2449                         return err;
2450         }
2451         return 0;
2452 }
2453
2454 static int
2455 load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
2456              char *license, __u32 kern_version, int *pfd)
2457 {
2458         struct bpf_load_program_attr load_attr;
2459         char *cp, errmsg[STRERR_BUFSIZE];
2460         int log_buf_size = BPF_LOG_BUF_SIZE;
2461         char *log_buf;
2462         int ret;
2463
2464         if (!insns || !insns_cnt)
2465                 return -EINVAL;
2466
2467         memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
2468         load_attr.prog_type = prog->type;
2469         load_attr.expected_attach_type = prog->expected_attach_type;
2470         if (prog->caps->name)
2471                 load_attr.name = prog->name;
2472         load_attr.insns = insns;
2473         load_attr.insns_cnt = insns_cnt;
2474         load_attr.license = license;
2475         load_attr.kern_version = kern_version;
2476         load_attr.prog_ifindex = prog->prog_ifindex;
2477         load_attr.prog_btf_fd = prog->btf_fd >= 0 ? prog->btf_fd : 0;
2478         load_attr.func_info = prog->func_info;
2479         load_attr.func_info_rec_size = prog->func_info_rec_size;
2480         load_attr.func_info_cnt = prog->func_info_cnt;
2481         load_attr.line_info = prog->line_info;
2482         load_attr.line_info_rec_size = prog->line_info_rec_size;
2483         load_attr.line_info_cnt = prog->line_info_cnt;
2484         load_attr.log_level = prog->log_level;
2485         load_attr.prog_flags = prog->prog_flags;
2486
2487 retry_load:
2488         log_buf = malloc(log_buf_size);
2489         if (!log_buf)
2490                 pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
2491
2492         ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
2493
2494         if (ret >= 0) {
2495                 if (load_attr.log_level)
2496                         pr_debug("verifier log:\n%s", log_buf);
2497                 *pfd = ret;
2498                 ret = 0;
2499                 goto out;
2500         }
2501
2502         if (errno == ENOSPC) {
2503                 log_buf_size <<= 1;
2504                 free(log_buf);
2505                 goto retry_load;
2506         }
2507         ret = -LIBBPF_ERRNO__LOAD;
2508         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
2509         pr_warning("load bpf program failed: %s\n", cp);
2510
2511         if (log_buf && log_buf[0] != '\0') {
2512                 ret = -LIBBPF_ERRNO__VERIFY;
2513                 pr_warning("-- BEGIN DUMP LOG ---\n");
2514                 pr_warning("\n%s\n", log_buf);
2515                 pr_warning("-- END LOG --\n");
2516         } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
2517                 pr_warning("Program too large (%zu insns), at most %d insns\n",
2518                            load_attr.insns_cnt, BPF_MAXINSNS);
2519                 ret = -LIBBPF_ERRNO__PROG2BIG;
2520         } else {
2521                 /* Wrong program type? */
2522                 if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
2523                         int fd;
2524
2525                         load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
2526                         load_attr.expected_attach_type = 0;
2527                         fd = bpf_load_program_xattr(&load_attr, NULL, 0);
2528                         if (fd >= 0) {
2529                                 close(fd);
2530                                 ret = -LIBBPF_ERRNO__PROGTYPE;
2531                                 goto out;
2532                         }
2533                 }
2534
2535                 if (log_buf)
2536                         ret = -LIBBPF_ERRNO__KVER;
2537         }
2538
2539 out:
2540         free(log_buf);
2541         return ret;
2542 }
2543
2544 int
2545 bpf_program__load(struct bpf_program *prog,
2546                   char *license, __u32 kern_version)
2547 {
2548         int err = 0, fd, i;
2549
2550         if (prog->instances.nr < 0 || !prog->instances.fds) {
2551                 if (prog->preprocessor) {
2552                         pr_warning("Internal error: can't load program '%s'\n",
2553                                    prog->section_name);
2554                         return -LIBBPF_ERRNO__INTERNAL;
2555                 }
2556
2557                 prog->instances.fds = malloc(sizeof(int));
2558                 if (!prog->instances.fds) {
2559                         pr_warning("Not enough memory for BPF fds\n");
2560                         return -ENOMEM;
2561                 }
2562                 prog->instances.nr = 1;
2563                 prog->instances.fds[0] = -1;
2564         }
2565
2566         if (!prog->preprocessor) {
2567                 if (prog->instances.nr != 1) {
2568                         pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
2569                                    prog->section_name, prog->instances.nr);
2570                 }
2571                 err = load_program(prog, prog->insns, prog->insns_cnt,
2572                                    license, kern_version, &fd);
2573                 if (!err)
2574                         prog->instances.fds[0] = fd;
2575                 goto out;
2576         }
2577
2578         for (i = 0; i < prog->instances.nr; i++) {
2579                 struct bpf_prog_prep_result result;
2580                 bpf_program_prep_t preprocessor = prog->preprocessor;
2581
2582                 memset(&result, 0, sizeof(result));
2583                 err = preprocessor(prog, i, prog->insns,
2584                                    prog->insns_cnt, &result);
2585                 if (err) {
2586                         pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
2587                                    i, prog->section_name);
2588                         goto out;
2589                 }
2590
2591                 if (!result.new_insn_ptr || !result.new_insn_cnt) {
2592                         pr_debug("Skip loading the %dth instance of program '%s'\n",
2593                                  i, prog->section_name);
2594                         prog->instances.fds[i] = -1;
2595                         if (result.pfd)
2596                                 *result.pfd = -1;
2597                         continue;
2598                 }
2599
2600                 err = load_program(prog, result.new_insn_ptr,
2601                                    result.new_insn_cnt,
2602                                    license, kern_version, &fd);
2603
2604                 if (err) {
2605                         pr_warning("Loading the %dth instance of program '%s' failed\n",
2606                                         i, prog->section_name);
2607                         goto out;
2608                 }
2609
2610                 if (result.pfd)
2611                         *result.pfd = fd;
2612                 prog->instances.fds[i] = fd;
2613         }
2614 out:
2615         if (err)
2616                 pr_warning("failed to load program '%s'\n",
2617                            prog->section_name);
2618         zfree(&prog->insns);
2619         prog->insns_cnt = 0;
2620         return err;
2621 }
2622
2623 static bool bpf_program__is_function_storage(const struct bpf_program *prog,
2624                                              const struct bpf_object *obj)
2625 {
2626         return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
2627 }
2628
2629 static int
2630 bpf_object__load_progs(struct bpf_object *obj, int log_level)
2631 {
2632         size_t i;
2633         int err;
2634
2635         for (i = 0; i < obj->nr_programs; i++) {
2636                 if (bpf_program__is_function_storage(&obj->programs[i], obj))
2637                         continue;
2638                 obj->programs[i].log_level |= log_level;
2639                 err = bpf_program__load(&obj->programs[i],
2640                                         obj->license,
2641                                         obj->kern_version);
2642                 if (err)
2643                         return err;
2644         }
2645         return 0;
2646 }
2647
2648 static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
2649 {
2650         switch (type) {
2651         case BPF_PROG_TYPE_SOCKET_FILTER:
2652         case BPF_PROG_TYPE_SCHED_CLS:
2653         case BPF_PROG_TYPE_SCHED_ACT:
2654         case BPF_PROG_TYPE_XDP:
2655         case BPF_PROG_TYPE_CGROUP_SKB:
2656         case BPF_PROG_TYPE_CGROUP_SOCK:
2657         case BPF_PROG_TYPE_LWT_IN:
2658         case BPF_PROG_TYPE_LWT_OUT:
2659         case BPF_PROG_TYPE_LWT_XMIT:
2660         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2661         case BPF_PROG_TYPE_SOCK_OPS:
2662         case BPF_PROG_TYPE_SK_SKB:
2663         case BPF_PROG_TYPE_CGROUP_DEVICE:
2664         case BPF_PROG_TYPE_SK_MSG:
2665         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
2666         case BPF_PROG_TYPE_LIRC_MODE2:
2667         case BPF_PROG_TYPE_SK_REUSEPORT:
2668         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2669         case BPF_PROG_TYPE_UNSPEC:
2670         case BPF_PROG_TYPE_TRACEPOINT:
2671         case BPF_PROG_TYPE_RAW_TRACEPOINT:
2672         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
2673         case BPF_PROG_TYPE_PERF_EVENT:
2674         case BPF_PROG_TYPE_CGROUP_SYSCTL:
2675         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2676                 return false;
2677         case BPF_PROG_TYPE_KPROBE:
2678         default:
2679                 return true;
2680         }
2681 }
2682
2683 static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
2684 {
2685         if (needs_kver && obj->kern_version == 0) {
2686                 pr_warning("%s doesn't provide kernel version\n",
2687                            obj->path);
2688                 return -LIBBPF_ERRNO__KVERSION;
2689         }
2690         return 0;
2691 }
2692
2693 static struct bpf_object *
2694 __bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
2695                    bool needs_kver, int flags)
2696 {
2697         struct bpf_object *obj;
2698         int err;
2699
2700         if (elf_version(EV_CURRENT) == EV_NONE) {
2701                 pr_warning("failed to init libelf for %s\n", path);
2702                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
2703         }
2704
2705         obj = bpf_object__new(path, obj_buf, obj_buf_sz);
2706         if (IS_ERR(obj))
2707                 return obj;
2708
2709         CHECK_ERR(bpf_object__elf_init(obj), err, out);
2710         CHECK_ERR(bpf_object__check_endianness(obj), err, out);
2711         CHECK_ERR(bpf_object__probe_caps(obj), err, out);
2712         CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out);
2713         CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
2714         CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
2715
2716         bpf_object__elf_finish(obj);
2717         return obj;
2718 out:
2719         bpf_object__close(obj);
2720         return ERR_PTR(err);
2721 }
2722
2723 struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr,
2724                                             int flags)
2725 {
2726         /* param validation */
2727         if (!attr->file)
2728                 return NULL;
2729
2730         pr_debug("loading %s\n", attr->file);
2731
2732         return __bpf_object__open(attr->file, NULL, 0,
2733                                   bpf_prog_type__needs_kver(attr->prog_type),
2734                                   flags);
2735 }
2736
2737 struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
2738 {
2739         return __bpf_object__open_xattr(attr, 0);
2740 }
2741
2742 struct bpf_object *bpf_object__open(const char *path)
2743 {
2744         struct bpf_object_open_attr attr = {
2745                 .file           = path,
2746                 .prog_type      = BPF_PROG_TYPE_UNSPEC,
2747         };
2748
2749         return bpf_object__open_xattr(&attr);
2750 }
2751
2752 struct bpf_object *bpf_object__open_buffer(void *obj_buf,
2753                                            size_t obj_buf_sz,
2754                                            const char *name)
2755 {
2756         char tmp_name[64];
2757
2758         /* param validation */
2759         if (!obj_buf || obj_buf_sz <= 0)
2760                 return NULL;
2761
2762         if (!name) {
2763                 snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
2764                          (unsigned long)obj_buf,
2765                          (unsigned long)obj_buf_sz);
2766                 name = tmp_name;
2767         }
2768         pr_debug("loading object '%s' from buffer\n", name);
2769
2770         return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true);
2771 }
2772
2773 int bpf_object__unload(struct bpf_object *obj)
2774 {
2775         size_t i;
2776
2777         if (!obj)
2778                 return -EINVAL;
2779
2780         for (i = 0; i < obj->nr_maps; i++)
2781                 zclose(obj->maps[i].fd);
2782
2783         for (i = 0; i < obj->nr_programs; i++)
2784                 bpf_program__unload(&obj->programs[i]);
2785
2786         return 0;
2787 }
2788
2789 int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
2790 {
2791         struct bpf_object *obj;
2792         int err;
2793
2794         if (!attr)
2795                 return -EINVAL;
2796         obj = attr->obj;
2797         if (!obj)
2798                 return -EINVAL;
2799
2800         if (obj->loaded) {
2801                 pr_warning("object should not be loaded twice\n");
2802                 return -EINVAL;
2803         }
2804
2805         obj->loaded = true;
2806
2807         CHECK_ERR(bpf_object__create_maps(obj), err, out);
2808         CHECK_ERR(bpf_object__relocate(obj), err, out);
2809         CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out);
2810
2811         return 0;
2812 out:
2813         bpf_object__unload(obj);
2814         pr_warning("failed to load object '%s'\n", obj->path);
2815         return err;
2816 }
2817
2818 int bpf_object__load(struct bpf_object *obj)
2819 {
2820         struct bpf_object_load_attr attr = {
2821                 .obj = obj,
2822         };
2823
2824         return bpf_object__load_xattr(&attr);
2825 }
2826
2827 static int check_path(const char *path)
2828 {
2829         char *cp, errmsg[STRERR_BUFSIZE];
2830         struct statfs st_fs;
2831         char *dname, *dir;
2832         int err = 0;
2833
2834         if (path == NULL)
2835                 return -EINVAL;
2836
2837         dname = strdup(path);
2838         if (dname == NULL)
2839                 return -ENOMEM;
2840
2841         dir = dirname(dname);
2842         if (statfs(dir, &st_fs)) {
2843                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
2844                 pr_warning("failed to statfs %s: %s\n", dir, cp);
2845                 err = -errno;
2846         }
2847         free(dname);
2848
2849         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
2850                 pr_warning("specified path %s is not on BPF FS\n", path);
2851                 err = -EINVAL;
2852         }
2853
2854         return err;
2855 }
2856
2857 int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
2858                               int instance)
2859 {
2860         char *cp, errmsg[STRERR_BUFSIZE];
2861         int err;
2862
2863         err = check_path(path);
2864         if (err)
2865                 return err;
2866
2867         if (prog == NULL) {
2868                 pr_warning("invalid program pointer\n");
2869                 return -EINVAL;
2870         }
2871
2872         if (instance < 0 || instance >= prog->instances.nr) {
2873                 pr_warning("invalid prog instance %d of prog %s (max %d)\n",
2874                            instance, prog->section_name, prog->instances.nr);
2875                 return -EINVAL;
2876         }
2877
2878         if (bpf_obj_pin(prog->instances.fds[instance], path)) {
2879                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
2880                 pr_warning("failed to pin program: %s\n", cp);
2881                 return -errno;
2882         }
2883         pr_debug("pinned program '%s'\n", path);
2884
2885         return 0;
2886 }
2887
2888 int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,
2889                                 int instance)
2890 {
2891         int err;
2892
2893         err = check_path(path);
2894         if (err)
2895                 return err;
2896
2897         if (prog == NULL) {
2898                 pr_warning("invalid program pointer\n");
2899                 return -EINVAL;
2900         }
2901
2902         if (instance < 0 || instance >= prog->instances.nr) {
2903                 pr_warning("invalid prog instance %d of prog %s (max %d)\n",
2904                            instance, prog->section_name, prog->instances.nr);
2905                 return -EINVAL;
2906         }
2907
2908         err = unlink(path);
2909         if (err != 0)
2910                 return -errno;
2911         pr_debug("unpinned program '%s'\n", path);
2912
2913         return 0;
2914 }
2915
2916 static int make_dir(const char *path)
2917 {
2918         char *cp, errmsg[STRERR_BUFSIZE];
2919         int err = 0;
2920
2921         if (mkdir(path, 0700) && errno != EEXIST)
2922                 err = -errno;
2923
2924         if (err) {
2925                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
2926                 pr_warning("failed to mkdir %s: %s\n", path, cp);
2927         }
2928         return err;
2929 }
2930
2931 int bpf_program__pin(struct bpf_program *prog, const char *path)
2932 {
2933         int i, err;
2934
2935         err = check_path(path);
2936         if (err)
2937                 return err;
2938
2939         if (prog == NULL) {
2940                 pr_warning("invalid program pointer\n");
2941                 return -EINVAL;
2942         }
2943
2944         if (prog->instances.nr <= 0) {
2945                 pr_warning("no instances of prog %s to pin\n",
2946                            prog->section_name);
2947                 return -EINVAL;
2948         }
2949
2950         if (prog->instances.nr == 1) {
2951                 /* don't create subdirs when pinning single instance */
2952                 return bpf_program__pin_instance(prog, path, 0);
2953         }
2954
2955         err = make_dir(path);
2956         if (err)
2957                 return err;
2958
2959         for (i = 0; i < prog->instances.nr; i++) {
2960                 char buf[PATH_MAX];
2961                 int len;
2962
2963                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
2964                 if (len < 0) {
2965                         err = -EINVAL;
2966                         goto err_unpin;
2967                 } else if (len >= PATH_MAX) {
2968                         err = -ENAMETOOLONG;
2969                         goto err_unpin;
2970                 }
2971
2972                 err = bpf_program__pin_instance(prog, buf, i);
2973                 if (err)
2974                         goto err_unpin;
2975         }
2976
2977         return 0;
2978
2979 err_unpin:
2980         for (i = i - 1; i >= 0; i--) {
2981                 char buf[PATH_MAX];
2982                 int len;
2983
2984                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
2985                 if (len < 0)
2986                         continue;
2987                 else if (len >= PATH_MAX)
2988                         continue;
2989
2990                 bpf_program__unpin_instance(prog, buf, i);
2991         }
2992
2993         rmdir(path);
2994
2995         return err;
2996 }
2997
2998 int bpf_program__unpin(struct bpf_program *prog, const char *path)
2999 {
3000         int i, err;
3001
3002         err = check_path(path);
3003         if (err)
3004                 return err;
3005
3006         if (prog == NULL) {
3007                 pr_warning("invalid program pointer\n");
3008                 return -EINVAL;
3009         }
3010
3011         if (prog->instances.nr <= 0) {
3012                 pr_warning("no instances of prog %s to pin\n",
3013                            prog->section_name);
3014                 return -EINVAL;
3015         }
3016
3017         if (prog->instances.nr == 1) {
3018                 /* don't create subdirs when pinning single instance */
3019                 return bpf_program__unpin_instance(prog, path, 0);
3020         }
3021
3022         for (i = 0; i < prog->instances.nr; i++) {
3023                 char buf[PATH_MAX];
3024                 int len;
3025
3026                 len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
3027                 if (len < 0)
3028                         return -EINVAL;
3029                 else if (len >= PATH_MAX)
3030                         return -ENAMETOOLONG;
3031
3032                 err = bpf_program__unpin_instance(prog, buf, i);
3033                 if (err)
3034                         return err;
3035         }
3036
3037         err = rmdir(path);
3038         if (err)
3039                 return -errno;
3040
3041         return 0;
3042 }
3043
3044 int bpf_map__pin(struct bpf_map *map, const char *path)
3045 {
3046         char *cp, errmsg[STRERR_BUFSIZE];
3047         int err;
3048
3049         err = check_path(path);
3050         if (err)
3051                 return err;
3052
3053         if (map == NULL) {
3054                 pr_warning("invalid map pointer\n");
3055                 return -EINVAL;
3056         }
3057
3058         if (bpf_obj_pin(map->fd, path)) {
3059                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
3060                 pr_warning("failed to pin map: %s\n", cp);
3061                 return -errno;
3062         }
3063
3064         pr_debug("pinned map '%s'\n", path);
3065
3066         return 0;
3067 }
3068
3069 int bpf_map__unpin(struct bpf_map *map, const char *path)
3070 {
3071         int err;
3072
3073         err = check_path(path);
3074         if (err)
3075                 return err;
3076
3077         if (map == NULL) {
3078                 pr_warning("invalid map pointer\n");
3079                 return -EINVAL;
3080         }
3081
3082         err = unlink(path);
3083         if (err != 0)
3084                 return -errno;
3085         pr_debug("unpinned map '%s'\n", path);
3086
3087         return 0;
3088 }
3089
3090 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
3091 {
3092         struct bpf_map *map;
3093         int err;
3094
3095         if (!obj)
3096                 return -ENOENT;
3097
3098         if (!obj->loaded) {
3099                 pr_warning("object not yet loaded; load it first\n");
3100                 return -ENOENT;
3101         }
3102
3103         err = make_dir(path);
3104         if (err)
3105                 return err;
3106
3107         bpf_object__for_each_map(map, obj) {
3108                 char buf[PATH_MAX];
3109                 int len;
3110
3111                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
3112                                bpf_map__name(map));
3113                 if (len < 0) {
3114                         err = -EINVAL;
3115                         goto err_unpin_maps;
3116                 } else if (len >= PATH_MAX) {
3117                         err = -ENAMETOOLONG;
3118                         goto err_unpin_maps;
3119                 }
3120
3121                 err = bpf_map__pin(map, buf);
3122                 if (err)
3123                         goto err_unpin_maps;
3124         }
3125
3126         return 0;
3127
3128 err_unpin_maps:
3129         while ((map = bpf_map__prev(map, obj))) {
3130                 char buf[PATH_MAX];
3131                 int len;
3132
3133                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
3134                                bpf_map__name(map));
3135                 if (len < 0)
3136                         continue;
3137                 else if (len >= PATH_MAX)
3138                         continue;
3139
3140                 bpf_map__unpin(map, buf);
3141         }
3142
3143         return err;
3144 }
3145
3146 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
3147 {
3148         struct bpf_map *map;
3149         int err;
3150
3151         if (!obj)
3152                 return -ENOENT;
3153
3154         bpf_object__for_each_map(map, obj) {
3155                 char buf[PATH_MAX];
3156                 int len;
3157
3158                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
3159                                bpf_map__name(map));
3160                 if (len < 0)
3161                         return -EINVAL;
3162                 else if (len >= PATH_MAX)
3163                         return -ENAMETOOLONG;
3164
3165                 err = bpf_map__unpin(map, buf);
3166                 if (err)
3167                         return err;
3168         }
3169
3170         return 0;
3171 }
3172
3173 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
3174 {
3175         struct bpf_program *prog;
3176         int err;
3177
3178         if (!obj)
3179                 return -ENOENT;
3180
3181         if (!obj->loaded) {
3182                 pr_warning("object not yet loaded; load it first\n");
3183                 return -ENOENT;
3184         }
3185
3186         err = make_dir(path);
3187         if (err)
3188                 return err;
3189
3190         bpf_object__for_each_program(prog, obj) {
3191                 char buf[PATH_MAX];
3192                 int len;
3193
3194                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
3195                                prog->pin_name);
3196                 if (len < 0) {
3197                         err = -EINVAL;
3198                         goto err_unpin_programs;
3199                 } else if (len >= PATH_MAX) {
3200                         err = -ENAMETOOLONG;
3201                         goto err_unpin_programs;
3202                 }
3203
3204                 err = bpf_program__pin(prog, buf);
3205                 if (err)
3206                         goto err_unpin_programs;
3207         }
3208
3209         return 0;
3210
3211 err_unpin_programs:
3212         while ((prog = bpf_program__prev(prog, obj))) {
3213                 char buf[PATH_MAX];
3214                 int len;
3215
3216                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
3217                                prog->pin_name);
3218                 if (len < 0)
3219                         continue;
3220                 else if (len >= PATH_MAX)
3221                         continue;
3222
3223                 bpf_program__unpin(prog, buf);
3224         }
3225
3226         return err;
3227 }
3228
3229 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
3230 {
3231         struct bpf_program *prog;
3232         int err;
3233
3234         if (!obj)
3235                 return -ENOENT;
3236
3237         bpf_object__for_each_program(prog, obj) {
3238                 char buf[PATH_MAX];
3239                 int len;
3240
3241                 len = snprintf(buf, PATH_MAX, "%s/%s", path,
3242                                prog->pin_name);
3243                 if (len < 0)
3244                         return -EINVAL;
3245                 else if (len >= PATH_MAX)
3246                         return -ENAMETOOLONG;
3247
3248                 err = bpf_program__unpin(prog, buf);
3249                 if (err)
3250                         return err;
3251         }
3252
3253         return 0;
3254 }
3255
3256 int bpf_object__pin(struct bpf_object *obj, const char *path)
3257 {
3258         int err;
3259
3260         err = bpf_object__pin_maps(obj, path);
3261         if (err)
3262                 return err;
3263
3264         err = bpf_object__pin_programs(obj, path);
3265         if (err) {
3266                 bpf_object__unpin_maps(obj, path);
3267                 return err;
3268         }
3269
3270         return 0;
3271 }
3272
3273 void bpf_object__close(struct bpf_object *obj)
3274 {
3275         size_t i;
3276
3277         if (!obj)
3278                 return;
3279
3280         if (obj->clear_priv)
3281                 obj->clear_priv(obj, obj->priv);
3282
3283         bpf_object__elf_finish(obj);
3284         bpf_object__unload(obj);
3285         btf__free(obj->btf);
3286         btf_ext__free(obj->btf_ext);
3287
3288         for (i = 0; i < obj->nr_maps; i++) {
3289                 zfree(&obj->maps[i].name);
3290                 if (obj->maps[i].clear_priv)
3291                         obj->maps[i].clear_priv(&obj->maps[i],
3292                                                 obj->maps[i].priv);
3293                 obj->maps[i].priv = NULL;
3294                 obj->maps[i].clear_priv = NULL;
3295         }
3296
3297         zfree(&obj->sections.rodata);
3298         zfree(&obj->sections.data);
3299         zfree(&obj->maps);
3300         obj->nr_maps = 0;
3301
3302         if (obj->programs && obj->nr_programs) {
3303                 for (i = 0; i < obj->nr_programs; i++)
3304                         bpf_program__exit(&obj->programs[i]);
3305         }
3306         zfree(&obj->programs);
3307
3308         list_del(&obj->list);
3309         free(obj);
3310 }
3311
3312 struct bpf_object *
3313 bpf_object__next(struct bpf_object *prev)
3314 {
3315         struct bpf_object *next;
3316
3317         if (!prev)
3318                 next = list_first_entry(&bpf_objects_list,
3319                                         struct bpf_object,
3320                                         list);
3321         else
3322                 next = list_next_entry(prev, list);
3323
3324         /* Empty list is noticed here so don't need checking on entry. */
3325         if (&next->list == &bpf_objects_list)
3326                 return NULL;
3327
3328         return next;
3329 }
3330
3331 const char *bpf_object__name(const struct bpf_object *obj)
3332 {
3333         return obj ? obj->path : ERR_PTR(-EINVAL);
3334 }
3335
3336 unsigned int bpf_object__kversion(const struct bpf_object *obj)
3337 {
3338         return obj ? obj->kern_version : 0;
3339 }
3340
3341 struct btf *bpf_object__btf(const struct bpf_object *obj)
3342 {
3343         return obj ? obj->btf : NULL;
3344 }
3345
3346 int bpf_object__btf_fd(const struct bpf_object *obj)
3347 {
3348         return obj->btf ? btf__fd(obj->btf) : -1;
3349 }
3350
3351 int bpf_object__set_priv(struct bpf_object *obj, void *priv,
3352                          bpf_object_clear_priv_t clear_priv)
3353 {
3354         if (obj->priv && obj->clear_priv)
3355                 obj->clear_priv(obj, obj->priv);
3356
3357         obj->priv = priv;
3358         obj->clear_priv = clear_priv;
3359         return 0;
3360 }
3361
3362 void *bpf_object__priv(const struct bpf_object *obj)
3363 {
3364         return obj ? obj->priv : ERR_PTR(-EINVAL);
3365 }
3366
3367 static struct bpf_program *
3368 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
3369                     bool forward)
3370 {
3371         size_t nr_programs = obj->nr_programs;
3372         ssize_t idx;
3373
3374         if (!nr_programs)
3375                 return NULL;
3376
3377         if (!p)
3378                 /* Iter from the beginning */
3379                 return forward ? &obj->programs[0] :
3380                         &obj->programs[nr_programs - 1];
3381
3382         if (p->obj != obj) {
3383                 pr_warning("error: program handler doesn't match object\n");
3384                 return NULL;
3385         }
3386
3387         idx = (p - obj->programs) + (forward ? 1 : -1);
3388         if (idx >= obj->nr_programs || idx < 0)
3389                 return NULL;
3390         return &obj->programs[idx];
3391 }
3392
3393 struct bpf_program *
3394 bpf_program__next(struct bpf_program *prev, const struct bpf_object *obj)
3395 {
3396         struct bpf_program *prog = prev;
3397
3398         do {
3399                 prog = __bpf_program__iter(prog, obj, true);
3400         } while (prog && bpf_program__is_function_storage(prog, obj));
3401
3402         return prog;
3403 }
3404
3405 struct bpf_program *
3406 bpf_program__prev(struct bpf_program *next, const struct bpf_object *obj)
3407 {
3408         struct bpf_program *prog = next;
3409
3410         do {
3411                 prog = __bpf_program__iter(prog, obj, false);
3412         } while (prog && bpf_program__is_function_storage(prog, obj));
3413
3414         return prog;
3415 }
3416
3417 int bpf_program__set_priv(struct bpf_program *prog, void *priv,
3418                           bpf_program_clear_priv_t clear_priv)
3419 {
3420         if (prog->priv && prog->clear_priv)
3421                 prog->clear_priv(prog, prog->priv);
3422
3423         prog->priv = priv;
3424         prog->clear_priv = clear_priv;
3425         return 0;
3426 }
3427
3428 void *bpf_program__priv(const struct bpf_program *prog)
3429 {
3430         return prog ? prog->priv : ERR_PTR(-EINVAL);
3431 }
3432
3433 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
3434 {
3435         prog->prog_ifindex = ifindex;
3436 }
3437
3438 const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
3439 {
3440         const char *title;
3441
3442         title = prog->section_name;
3443         if (needs_copy) {
3444                 title = strdup(title);
3445                 if (!title) {
3446                         pr_warning("failed to strdup program title\n");
3447                         return ERR_PTR(-ENOMEM);
3448                 }
3449         }
3450
3451         return title;
3452 }
3453
3454 int bpf_program__fd(const struct bpf_program *prog)
3455 {
3456         return bpf_program__nth_fd(prog, 0);
3457 }
3458
3459 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
3460                           bpf_program_prep_t prep)
3461 {
3462         int *instances_fds;
3463
3464         if (nr_instances <= 0 || !prep)
3465                 return -EINVAL;
3466
3467         if (prog->instances.nr > 0 || prog->instances.fds) {
3468                 pr_warning("Can't set pre-processor after loading\n");
3469                 return -EINVAL;
3470         }
3471
3472         instances_fds = malloc(sizeof(int) * nr_instances);
3473         if (!instances_fds) {
3474                 pr_warning("alloc memory failed for fds\n");
3475                 return -ENOMEM;
3476         }
3477
3478         /* fill all fd with -1 */
3479         memset(instances_fds, -1, sizeof(int) * nr_instances);
3480
3481         prog->instances.nr = nr_instances;
3482         prog->instances.fds = instances_fds;
3483         prog->preprocessor = prep;
3484         return 0;
3485 }
3486
3487 int bpf_program__nth_fd(const struct bpf_program *prog, int n)
3488 {
3489         int fd;
3490
3491         if (!prog)
3492                 return -EINVAL;
3493
3494         if (n >= prog->instances.nr || n < 0) {
3495                 pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
3496                            n, prog->section_name, prog->instances.nr);
3497                 return -EINVAL;
3498         }
3499
3500         fd = prog->instances.fds[n];
3501         if (fd < 0) {
3502                 pr_warning("%dth instance of program '%s' is invalid\n",
3503                            n, prog->section_name);
3504                 return -ENOENT;
3505         }
3506
3507         return fd;
3508 }
3509
3510 void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
3511 {
3512         prog->type = type;
3513 }
3514
3515 static bool bpf_program__is_type(const struct bpf_program *prog,
3516                                  enum bpf_prog_type type)
3517 {
3518         return prog ? (prog->type == type) : false;
3519 }
3520
3521 #define BPF_PROG_TYPE_FNS(NAME, TYPE)                           \
3522 int bpf_program__set_##NAME(struct bpf_program *prog)           \
3523 {                                                               \
3524         if (!prog)                                              \
3525                 return -EINVAL;                                 \
3526         bpf_program__set_type(prog, TYPE);                      \
3527         return 0;                                               \
3528 }                                                               \
3529                                                                 \
3530 bool bpf_program__is_##NAME(const struct bpf_program *prog)     \
3531 {                                                               \
3532         return bpf_program__is_type(prog, TYPE);                \
3533 }                                                               \
3534
3535 BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
3536 BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
3537 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
3538 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
3539 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
3540 BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
3541 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
3542 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
3543
3544 void bpf_program__set_expected_attach_type(struct bpf_program *prog,
3545                                            enum bpf_attach_type type)
3546 {
3547         prog->expected_attach_type = type;
3548 }
3549
3550 #define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \
3551         { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype }
3552
3553 /* Programs that can NOT be attached. */
3554 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0)
3555
3556 /* Programs that can be attached. */
3557 #define BPF_APROG_SEC(string, ptype, atype) \
3558         BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype)
3559
3560 /* Programs that must specify expected attach type at load time. */
3561 #define BPF_EAPROG_SEC(string, ptype, eatype) \
3562         BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype)
3563
3564 /* Programs that can be attached but attach type can't be identified by section
3565  * name. Kept for backward compatibility.
3566  */
3567 #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype)
3568
3569 static const struct {
3570         const char *sec;
3571         size_t len;
3572         enum bpf_prog_type prog_type;
3573         enum bpf_attach_type expected_attach_type;
3574         int is_attachable;
3575         enum bpf_attach_type attach_type;
3576 } section_names[] = {
3577         BPF_PROG_SEC("socket",                  BPF_PROG_TYPE_SOCKET_FILTER),
3578         BPF_PROG_SEC("kprobe/",                 BPF_PROG_TYPE_KPROBE),
3579         BPF_PROG_SEC("kretprobe/",              BPF_PROG_TYPE_KPROBE),
3580         BPF_PROG_SEC("classifier",              BPF_PROG_TYPE_SCHED_CLS),
3581         BPF_PROG_SEC("action",                  BPF_PROG_TYPE_SCHED_ACT),
3582         BPF_PROG_SEC("tracepoint/",             BPF_PROG_TYPE_TRACEPOINT),
3583         BPF_PROG_SEC("raw_tracepoint/",         BPF_PROG_TYPE_RAW_TRACEPOINT),
3584         BPF_PROG_SEC("xdp",                     BPF_PROG_TYPE_XDP),
3585         BPF_PROG_SEC("perf_event",              BPF_PROG_TYPE_PERF_EVENT),
3586         BPF_PROG_SEC("lwt_in",                  BPF_PROG_TYPE_LWT_IN),
3587         BPF_PROG_SEC("lwt_out",                 BPF_PROG_TYPE_LWT_OUT),
3588         BPF_PROG_SEC("lwt_xmit",                BPF_PROG_TYPE_LWT_XMIT),
3589         BPF_PROG_SEC("lwt_seg6local",           BPF_PROG_TYPE_LWT_SEG6LOCAL),
3590         BPF_APROG_SEC("cgroup_skb/ingress",     BPF_PROG_TYPE_CGROUP_SKB,
3591                                                 BPF_CGROUP_INET_INGRESS),
3592         BPF_APROG_SEC("cgroup_skb/egress",      BPF_PROG_TYPE_CGROUP_SKB,
3593                                                 BPF_CGROUP_INET_EGRESS),
3594         BPF_APROG_COMPAT("cgroup/skb",          BPF_PROG_TYPE_CGROUP_SKB),
3595         BPF_APROG_SEC("cgroup/sock",            BPF_PROG_TYPE_CGROUP_SOCK,
3596                                                 BPF_CGROUP_INET_SOCK_CREATE),
3597         BPF_EAPROG_SEC("cgroup/post_bind4",     BPF_PROG_TYPE_CGROUP_SOCK,
3598                                                 BPF_CGROUP_INET4_POST_BIND),
3599         BPF_EAPROG_SEC("cgroup/post_bind6",     BPF_PROG_TYPE_CGROUP_SOCK,
3600                                                 BPF_CGROUP_INET6_POST_BIND),
3601         BPF_APROG_SEC("cgroup/dev",             BPF_PROG_TYPE_CGROUP_DEVICE,
3602                                                 BPF_CGROUP_DEVICE),
3603         BPF_APROG_SEC("sockops",                BPF_PROG_TYPE_SOCK_OPS,
3604                                                 BPF_CGROUP_SOCK_OPS),
3605         BPF_APROG_SEC("sk_skb/stream_parser",   BPF_PROG_TYPE_SK_SKB,
3606                                                 BPF_SK_SKB_STREAM_PARSER),
3607         BPF_APROG_SEC("sk_skb/stream_verdict",  BPF_PROG_TYPE_SK_SKB,
3608                                                 BPF_SK_SKB_STREAM_VERDICT),
3609         BPF_APROG_COMPAT("sk_skb",              BPF_PROG_TYPE_SK_SKB),
3610         BPF_APROG_SEC("sk_msg",                 BPF_PROG_TYPE_SK_MSG,
3611                                                 BPF_SK_MSG_VERDICT),
3612         BPF_APROG_SEC("lirc_mode2",             BPF_PROG_TYPE_LIRC_MODE2,
3613                                                 BPF_LIRC_MODE2),
3614         BPF_APROG_SEC("flow_dissector",         BPF_PROG_TYPE_FLOW_DISSECTOR,
3615                                                 BPF_FLOW_DISSECTOR),
3616         BPF_EAPROG_SEC("cgroup/bind4",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3617                                                 BPF_CGROUP_INET4_BIND),
3618         BPF_EAPROG_SEC("cgroup/bind6",          BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3619                                                 BPF_CGROUP_INET6_BIND),
3620         BPF_EAPROG_SEC("cgroup/connect4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3621                                                 BPF_CGROUP_INET4_CONNECT),
3622         BPF_EAPROG_SEC("cgroup/connect6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3623                                                 BPF_CGROUP_INET6_CONNECT),
3624         BPF_EAPROG_SEC("cgroup/sendmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3625                                                 BPF_CGROUP_UDP4_SENDMSG),
3626         BPF_EAPROG_SEC("cgroup/sendmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3627                                                 BPF_CGROUP_UDP6_SENDMSG),
3628         BPF_EAPROG_SEC("cgroup/recvmsg4",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3629                                                 BPF_CGROUP_UDP4_RECVMSG),
3630         BPF_EAPROG_SEC("cgroup/recvmsg6",       BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
3631                                                 BPF_CGROUP_UDP6_RECVMSG),
3632         BPF_EAPROG_SEC("cgroup/sysctl",         BPF_PROG_TYPE_CGROUP_SYSCTL,
3633                                                 BPF_CGROUP_SYSCTL),
3634         BPF_EAPROG_SEC("cgroup/getsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
3635                                                 BPF_CGROUP_GETSOCKOPT),
3636         BPF_EAPROG_SEC("cgroup/setsockopt",     BPF_PROG_TYPE_CGROUP_SOCKOPT,
3637                                                 BPF_CGROUP_SETSOCKOPT),
3638 };
3639
3640 #undef BPF_PROG_SEC_IMPL
3641 #undef BPF_PROG_SEC
3642 #undef BPF_APROG_SEC
3643 #undef BPF_EAPROG_SEC
3644 #undef BPF_APROG_COMPAT
3645
3646 #define MAX_TYPE_NAME_SIZE 32
3647
3648 static char *libbpf_get_type_names(bool attach_type)
3649 {
3650         int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE;
3651         char *buf;
3652
3653         buf = malloc(len);
3654         if (!buf)
3655                 return NULL;
3656
3657         buf[0] = '\0';
3658         /* Forge string buf with all available names */
3659         for (i = 0; i < ARRAY_SIZE(section_names); i++) {
3660                 if (attach_type && !section_names[i].is_attachable)
3661                         continue;
3662
3663                 if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) {
3664                         free(buf);
3665                         return NULL;
3666                 }
3667                 strcat(buf, " ");
3668                 strcat(buf, section_names[i].sec);
3669         }
3670
3671         return buf;
3672 }
3673
3674 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
3675                              enum bpf_attach_type *expected_attach_type)
3676 {
3677         char *type_names;
3678         int i;
3679
3680         if (!name)
3681                 return -EINVAL;
3682
3683         for (i = 0; i < ARRAY_SIZE(section_names); i++) {
3684                 if (strncmp(name, section_names[i].sec, section_names[i].len))
3685                         continue;
3686                 *prog_type = section_names[i].prog_type;
3687                 *expected_attach_type = section_names[i].expected_attach_type;
3688                 return 0;
3689         }
3690         pr_warning("failed to guess program type based on ELF section name '%s'\n", name);
3691         type_names = libbpf_get_type_names(false);
3692         if (type_names != NULL) {
3693                 pr_info("supported section(type) names are:%s\n", type_names);
3694                 free(type_names);
3695         }
3696
3697         return -EINVAL;
3698 }
3699
3700 int libbpf_attach_type_by_name(const char *name,
3701                                enum bpf_attach_type *attach_type)
3702 {
3703         char *type_names;
3704         int i;
3705
3706         if (!name)
3707                 return -EINVAL;
3708
3709         for (i = 0; i < ARRAY_SIZE(section_names); i++) {
3710                 if (strncmp(name, section_names[i].sec, section_names[i].len))
3711                         continue;
3712                 if (!section_names[i].is_attachable)
3713                         return -EINVAL;
3714                 *attach_type = section_names[i].attach_type;
3715                 return 0;
3716         }
3717         pr_warning("failed to guess attach type based on ELF section name '%s'\n", name);
3718         type_names = libbpf_get_type_names(true);
3719         if (type_names != NULL) {
3720                 pr_info("attachable section(type) names are:%s\n", type_names);
3721                 free(type_names);
3722         }
3723
3724         return -EINVAL;
3725 }
3726
3727 static int
3728 bpf_program__identify_section(struct bpf_program *prog,
3729                               enum bpf_prog_type *prog_type,
3730                               enum bpf_attach_type *expected_attach_type)
3731 {
3732         return libbpf_prog_type_by_name(prog->section_name, prog_type,
3733                                         expected_attach_type);
3734 }
3735
3736 int bpf_map__fd(const struct bpf_map *map)
3737 {
3738         return map ? map->fd : -EINVAL;
3739 }
3740
3741 const struct bpf_map_def *bpf_map__def(const struct bpf_map *map)
3742 {
3743         return map ? &map->def : ERR_PTR(-EINVAL);
3744 }
3745
3746 const char *bpf_map__name(const struct bpf_map *map)
3747 {
3748         return map ? map->name : NULL;
3749 }
3750
3751 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
3752 {
3753         return map ? map->btf_key_type_id : 0;
3754 }
3755
3756 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
3757 {
3758         return map ? map->btf_value_type_id : 0;
3759 }
3760
3761 int bpf_map__set_priv(struct bpf_map *map, void *priv,
3762                      bpf_map_clear_priv_t clear_priv)
3763 {
3764         if (!map)
3765                 return -EINVAL;
3766
3767         if (map->priv) {
3768                 if (map->clear_priv)
3769                         map->clear_priv(map, map->priv);
3770         }
3771
3772         map->priv = priv;
3773         map->clear_priv = clear_priv;
3774         return 0;
3775 }
3776
3777 void *bpf_map__priv(const struct bpf_map *map)
3778 {
3779         return map ? map->priv : ERR_PTR(-EINVAL);
3780 }
3781
3782 bool bpf_map__is_offload_neutral(const struct bpf_map *map)
3783 {
3784         return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
3785 }
3786
3787 bool bpf_map__is_internal(const struct bpf_map *map)
3788 {
3789         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
3790 }
3791
3792 void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
3793 {
3794         map->map_ifindex = ifindex;
3795 }
3796
3797 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
3798 {
3799         if (!bpf_map_type__is_map_in_map(map->def.type)) {
3800                 pr_warning("error: unsupported map type\n");
3801                 return -EINVAL;
3802         }
3803         if (map->inner_map_fd != -1) {
3804                 pr_warning("error: inner_map_fd already specified\n");
3805                 return -EINVAL;
3806         }
3807         map->inner_map_fd = fd;
3808         return 0;
3809 }
3810
3811 static struct bpf_map *
3812 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
3813 {
3814         ssize_t idx;
3815         struct bpf_map *s, *e;
3816
3817         if (!obj || !obj->maps)
3818                 return NULL;
3819
3820         s = obj->maps;
3821         e = obj->maps + obj->nr_maps;
3822
3823         if ((m < s) || (m >= e)) {
3824                 pr_warning("error in %s: map handler doesn't belong to object\n",
3825                            __func__);
3826                 return NULL;
3827         }
3828
3829         idx = (m - obj->maps) + i;
3830         if (idx >= obj->nr_maps || idx < 0)
3831                 return NULL;
3832         return &obj->maps[idx];
3833 }
3834
3835 struct bpf_map *
3836 bpf_map__next(const struct bpf_map *prev, const struct bpf_object *obj)
3837 {
3838         if (prev == NULL)
3839                 return obj->maps;
3840
3841         return __bpf_map__iter(prev, obj, 1);
3842 }
3843
3844 struct bpf_map *
3845 bpf_map__prev(const struct bpf_map *next, const struct bpf_object *obj)
3846 {
3847         if (next == NULL) {
3848                 if (!obj->nr_maps)
3849                         return NULL;
3850                 return obj->maps + obj->nr_maps - 1;
3851         }
3852
3853         return __bpf_map__iter(next, obj, -1);
3854 }
3855
3856 struct bpf_map *
3857 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
3858 {
3859         struct bpf_map *pos;
3860
3861         bpf_object__for_each_map(pos, obj) {
3862                 if (pos->name && !strcmp(pos->name, name))
3863                         return pos;
3864         }
3865         return NULL;
3866 }
3867
3868 int
3869 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
3870 {
3871         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
3872 }
3873
3874 struct bpf_map *
3875 bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
3876 {
3877         return ERR_PTR(-ENOTSUP);
3878 }
3879
3880 long libbpf_get_error(const void *ptr)
3881 {
3882         return PTR_ERR_OR_ZERO(ptr);
3883 }
3884
3885 int bpf_prog_load(const char *file, enum bpf_prog_type type,
3886                   struct bpf_object **pobj, int *prog_fd)
3887 {
3888         struct bpf_prog_load_attr attr;
3889
3890         memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
3891         attr.file = file;
3892         attr.prog_type = type;
3893         attr.expected_attach_type = 0;
3894
3895         return bpf_prog_load_xattr(&attr, pobj, prog_fd);
3896 }
3897
3898 int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
3899                         struct bpf_object **pobj, int *prog_fd)
3900 {
3901         struct bpf_object_open_attr open_attr = {};
3902         struct bpf_program *prog, *first_prog = NULL;
3903         enum bpf_attach_type expected_attach_type;
3904         enum bpf_prog_type prog_type;
3905         struct bpf_object *obj;
3906         struct bpf_map *map;
3907         int err;
3908
3909         if (!attr)
3910                 return -EINVAL;
3911         if (!attr->file)
3912                 return -EINVAL;
3913
3914         open_attr.file = attr->file;
3915         open_attr.prog_type = attr->prog_type;
3916
3917         obj = bpf_object__open_xattr(&open_attr);
3918         if (IS_ERR_OR_NULL(obj))
3919                 return -ENOENT;
3920
3921         bpf_object__for_each_program(prog, obj) {
3922                 /*
3923                  * If type is not specified, try to guess it based on
3924                  * section name.
3925                  */
3926                 prog_type = attr->prog_type;
3927                 prog->prog_ifindex = attr->ifindex;
3928                 expected_attach_type = attr->expected_attach_type;
3929                 if (prog_type == BPF_PROG_TYPE_UNSPEC) {
3930                         err = bpf_program__identify_section(prog, &prog_type,
3931                                                             &expected_attach_type);
3932                         if (err < 0) {
3933                                 bpf_object__close(obj);
3934                                 return -EINVAL;
3935                         }
3936                 }
3937
3938                 bpf_program__set_type(prog, prog_type);
3939                 bpf_program__set_expected_attach_type(prog,
3940                                                       expected_attach_type);
3941
3942                 prog->log_level = attr->log_level;
3943                 prog->prog_flags = attr->prog_flags;
3944                 if (!first_prog)
3945                         first_prog = prog;
3946         }
3947
3948         bpf_object__for_each_map(map, obj) {
3949                 if (!bpf_map__is_offload_neutral(map))
3950                         map->map_ifindex = attr->ifindex;
3951         }
3952
3953         if (!first_prog) {
3954                 pr_warning("object file doesn't contain bpf program\n");
3955                 bpf_object__close(obj);
3956                 return -ENOENT;
3957         }
3958
3959         err = bpf_object__load(obj);
3960         if (err) {
3961                 bpf_object__close(obj);
3962                 return -EINVAL;
3963         }
3964
3965         *pobj = obj;
3966         *prog_fd = bpf_program__fd(first_prog);
3967         return 0;
3968 }
3969
3970 struct bpf_link {
3971         int (*destroy)(struct bpf_link *link);
3972 };
3973
3974 int bpf_link__destroy(struct bpf_link *link)
3975 {
3976         int err;
3977
3978         if (!link)
3979                 return 0;
3980
3981         err = link->destroy(link);
3982         free(link);
3983
3984         return err;
3985 }
3986
3987 struct bpf_link_fd {
3988         struct bpf_link link; /* has to be at the top of struct */
3989         int fd; /* hook FD */
3990 };
3991
3992 static int bpf_link__destroy_perf_event(struct bpf_link *link)
3993 {
3994         struct bpf_link_fd *l = (void *)link;
3995         int err;
3996
3997         err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
3998         if (err)
3999                 err = -errno;
4000
4001         close(l->fd);
4002         return err;
4003 }
4004
4005 struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
4006                                                 int pfd)
4007 {
4008         char errmsg[STRERR_BUFSIZE];
4009         struct bpf_link_fd *link;
4010         int prog_fd, err;
4011
4012         if (pfd < 0) {
4013                 pr_warning("program '%s': invalid perf event FD %d\n",
4014                            bpf_program__title(prog, false), pfd);
4015                 return ERR_PTR(-EINVAL);
4016         }
4017         prog_fd = bpf_program__fd(prog);
4018         if (prog_fd < 0) {
4019                 pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n",
4020                            bpf_program__title(prog, false));
4021                 return ERR_PTR(-EINVAL);
4022         }
4023
4024         link = malloc(sizeof(*link));
4025         if (!link)
4026                 return ERR_PTR(-ENOMEM);
4027         link->link.destroy = &bpf_link__destroy_perf_event;
4028         link->fd = pfd;
4029
4030         if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
4031                 err = -errno;
4032                 free(link);
4033                 pr_warning("program '%s': failed to attach to pfd %d: %s\n",
4034                            bpf_program__title(prog, false), pfd,
4035                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4036                 return ERR_PTR(err);
4037         }
4038         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
4039                 err = -errno;
4040                 free(link);
4041                 pr_warning("program '%s': failed to enable pfd %d: %s\n",
4042                            bpf_program__title(prog, false), pfd,
4043                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4044                 return ERR_PTR(err);
4045         }
4046         return (struct bpf_link *)link;
4047 }
4048
4049 /*
4050  * this function is expected to parse integer in the range of [0, 2^31-1] from
4051  * given file using scanf format string fmt. If actual parsed value is
4052  * negative, the result might be indistinguishable from error
4053  */
4054 static int parse_uint_from_file(const char *file, const char *fmt)
4055 {
4056         char buf[STRERR_BUFSIZE];
4057         int err, ret;
4058         FILE *f;
4059
4060         f = fopen(file, "r");
4061         if (!f) {
4062                 err = -errno;
4063                 pr_debug("failed to open '%s': %s\n", file,
4064                          libbpf_strerror_r(err, buf, sizeof(buf)));
4065                 return err;
4066         }
4067         err = fscanf(f, fmt, &ret);
4068         if (err != 1) {
4069                 err = err == EOF ? -EIO : -errno;
4070                 pr_debug("failed to parse '%s': %s\n", file,
4071                         libbpf_strerror_r(err, buf, sizeof(buf)));
4072                 fclose(f);
4073                 return err;
4074         }
4075         fclose(f);
4076         return ret;
4077 }
4078
4079 static int determine_kprobe_perf_type(void)
4080 {
4081         const char *file = "/sys/bus/event_source/devices/kprobe/type";
4082
4083         return parse_uint_from_file(file, "%d\n");
4084 }
4085
4086 static int determine_uprobe_perf_type(void)
4087 {
4088         const char *file = "/sys/bus/event_source/devices/uprobe/type";
4089
4090         return parse_uint_from_file(file, "%d\n");
4091 }
4092
4093 static int determine_kprobe_retprobe_bit(void)
4094 {
4095         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
4096
4097         return parse_uint_from_file(file, "config:%d\n");
4098 }
4099
4100 static int determine_uprobe_retprobe_bit(void)
4101 {
4102         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
4103
4104         return parse_uint_from_file(file, "config:%d\n");
4105 }
4106
4107 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
4108                                  uint64_t offset, int pid)
4109 {
4110         struct perf_event_attr attr = {};
4111         char errmsg[STRERR_BUFSIZE];
4112         int type, pfd, err;
4113
4114         type = uprobe ? determine_uprobe_perf_type()
4115                       : determine_kprobe_perf_type();
4116         if (type < 0) {
4117                 pr_warning("failed to determine %s perf type: %s\n",
4118                            uprobe ? "uprobe" : "kprobe",
4119                            libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
4120                 return type;
4121         }
4122         if (retprobe) {
4123                 int bit = uprobe ? determine_uprobe_retprobe_bit()
4124                                  : determine_kprobe_retprobe_bit();
4125
4126                 if (bit < 0) {
4127                         pr_warning("failed to determine %s retprobe bit: %s\n",
4128                                    uprobe ? "uprobe" : "kprobe",
4129                                    libbpf_strerror_r(bit, errmsg,
4130                                                      sizeof(errmsg)));
4131                         return bit;
4132                 }
4133                 attr.config |= 1 << bit;
4134         }
4135         attr.size = sizeof(attr);
4136         attr.type = type;
4137         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
4138         attr.config2 = offset;           /* kprobe_addr or probe_offset */
4139
4140         /* pid filter is meaningful only for uprobes */
4141         pfd = syscall(__NR_perf_event_open, &attr,
4142                       pid < 0 ? -1 : pid /* pid */,
4143                       pid == -1 ? 0 : -1 /* cpu */,
4144                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
4145         if (pfd < 0) {
4146                 err = -errno;
4147                 pr_warning("%s perf_event_open() failed: %s\n",
4148                            uprobe ? "uprobe" : "kprobe",
4149                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4150                 return err;
4151         }
4152         return pfd;
4153 }
4154
4155 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
4156                                             bool retprobe,
4157                                             const char *func_name)
4158 {
4159         char errmsg[STRERR_BUFSIZE];
4160         struct bpf_link *link;
4161         int pfd, err;
4162
4163         pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
4164                                     0 /* offset */, -1 /* pid */);
4165         if (pfd < 0) {
4166                 pr_warning("program '%s': failed to create %s '%s' perf event: %s\n",
4167                            bpf_program__title(prog, false),
4168                            retprobe ? "kretprobe" : "kprobe", func_name,
4169                            libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
4170                 return ERR_PTR(pfd);
4171         }
4172         link = bpf_program__attach_perf_event(prog, pfd);
4173         if (IS_ERR(link)) {
4174                 close(pfd);
4175                 err = PTR_ERR(link);
4176                 pr_warning("program '%s': failed to attach to %s '%s': %s\n",
4177                            bpf_program__title(prog, false),
4178                            retprobe ? "kretprobe" : "kprobe", func_name,
4179                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4180                 return link;
4181         }
4182         return link;
4183 }
4184
4185 struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
4186                                             bool retprobe, pid_t pid,
4187                                             const char *binary_path,
4188                                             size_t func_offset)
4189 {
4190         char errmsg[STRERR_BUFSIZE];
4191         struct bpf_link *link;
4192         int pfd, err;
4193
4194         pfd = perf_event_open_probe(true /* uprobe */, retprobe,
4195                                     binary_path, func_offset, pid);
4196         if (pfd < 0) {
4197                 pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
4198                            bpf_program__title(prog, false),
4199                            retprobe ? "uretprobe" : "uprobe",
4200                            binary_path, func_offset,
4201                            libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
4202                 return ERR_PTR(pfd);
4203         }
4204         link = bpf_program__attach_perf_event(prog, pfd);
4205         if (IS_ERR(link)) {
4206                 close(pfd);
4207                 err = PTR_ERR(link);
4208                 pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n",
4209                            bpf_program__title(prog, false),
4210                            retprobe ? "uretprobe" : "uprobe",
4211                            binary_path, func_offset,
4212                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4213                 return link;
4214         }
4215         return link;
4216 }
4217
4218 static int determine_tracepoint_id(const char *tp_category,
4219                                    const char *tp_name)
4220 {
4221         char file[PATH_MAX];
4222         int ret;
4223
4224         ret = snprintf(file, sizeof(file),
4225                        "/sys/kernel/debug/tracing/events/%s/%s/id",
4226                        tp_category, tp_name);
4227         if (ret < 0)
4228                 return -errno;
4229         if (ret >= sizeof(file)) {
4230                 pr_debug("tracepoint %s/%s path is too long\n",
4231                          tp_category, tp_name);
4232                 return -E2BIG;
4233         }
4234         return parse_uint_from_file(file, "%d\n");
4235 }
4236
4237 static int perf_event_open_tracepoint(const char *tp_category,
4238                                       const char *tp_name)
4239 {
4240         struct perf_event_attr attr = {};
4241         char errmsg[STRERR_BUFSIZE];
4242         int tp_id, pfd, err;
4243
4244         tp_id = determine_tracepoint_id(tp_category, tp_name);
4245         if (tp_id < 0) {
4246                 pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
4247                            tp_category, tp_name,
4248                            libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
4249                 return tp_id;
4250         }
4251
4252         attr.type = PERF_TYPE_TRACEPOINT;
4253         attr.size = sizeof(attr);
4254         attr.config = tp_id;
4255
4256         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
4257                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
4258         if (pfd < 0) {
4259                 err = -errno;
4260                 pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n",
4261                            tp_category, tp_name,
4262                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4263                 return err;
4264         }
4265         return pfd;
4266 }
4267
4268 struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
4269                                                 const char *tp_category,
4270                                                 const char *tp_name)
4271 {
4272         char errmsg[STRERR_BUFSIZE];
4273         struct bpf_link *link;
4274         int pfd, err;
4275
4276         pfd = perf_event_open_tracepoint(tp_category, tp_name);
4277         if (pfd < 0) {
4278                 pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
4279                            bpf_program__title(prog, false),
4280                            tp_category, tp_name,
4281                            libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
4282                 return ERR_PTR(pfd);
4283         }
4284         link = bpf_program__attach_perf_event(prog, pfd);
4285         if (IS_ERR(link)) {
4286                 close(pfd);
4287                 err = PTR_ERR(link);
4288                 pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n",
4289                            bpf_program__title(prog, false),
4290                            tp_category, tp_name,
4291                            libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
4292                 return link;
4293         }
4294         return link;
4295 }
4296
4297 static int bpf_link__destroy_fd(struct bpf_link *link)
4298 {
4299         struct bpf_link_fd *l = (void *)link;
4300
4301         return close(l->fd);
4302 }
4303
4304 struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
4305                                                     const char *tp_name)
4306 {
4307         char errmsg[STRERR_BUFSIZE];
4308         struct bpf_link_fd *link;
4309         int prog_fd, pfd;
4310
4311         prog_fd = bpf_program__fd(prog);
4312         if (prog_fd < 0) {
4313                 pr_warning("program '%s': can't attach before loaded\n",
4314                            bpf_program__title(prog, false));
4315                 return ERR_PTR(-EINVAL);
4316         }
4317
4318         link = malloc(sizeof(*link));
4319         if (!link)
4320                 return ERR_PTR(-ENOMEM);
4321         link->link.destroy = &bpf_link__destroy_fd;
4322
4323         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
4324         if (pfd < 0) {
4325                 pfd = -errno;
4326                 free(link);
4327                 pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n",
4328                            bpf_program__title(prog, false), tp_name,
4329                            libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
4330                 return ERR_PTR(pfd);
4331         }
4332         link->fd = pfd;
4333         return (struct bpf_link *)link;
4334 }
4335
4336 enum bpf_perf_event_ret
4337 bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
4338                            void **copy_mem, size_t *copy_size,
4339                            bpf_perf_event_print_t fn, void *private_data)
4340 {
4341         struct perf_event_mmap_page *header = mmap_mem;
4342         __u64 data_head = ring_buffer_read_head(header);
4343         __u64 data_tail = header->data_tail;
4344         void *base = ((__u8 *)header) + page_size;
4345         int ret = LIBBPF_PERF_EVENT_CONT;
4346         struct perf_event_header *ehdr;
4347         size_t ehdr_size;
4348
4349         while (data_head != data_tail) {
4350                 ehdr = base + (data_tail & (mmap_size - 1));
4351                 ehdr_size = ehdr->size;
4352
4353                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
4354                         void *copy_start = ehdr;
4355                         size_t len_first = base + mmap_size - copy_start;
4356                         size_t len_secnd = ehdr_size - len_first;
4357
4358                         if (*copy_size < ehdr_size) {
4359                                 free(*copy_mem);
4360                                 *copy_mem = malloc(ehdr_size);
4361                                 if (!*copy_mem) {
4362                                         *copy_size = 0;
4363                                         ret = LIBBPF_PERF_EVENT_ERROR;
4364                                         break;
4365                                 }
4366                                 *copy_size = ehdr_size;
4367                         }
4368
4369                         memcpy(*copy_mem, copy_start, len_first);
4370                         memcpy(*copy_mem + len_first, base, len_secnd);
4371                         ehdr = *copy_mem;
4372                 }
4373
4374                 ret = fn(ehdr, private_data);
4375                 data_tail += ehdr_size;
4376                 if (ret != LIBBPF_PERF_EVENT_CONT)
4377                         break;
4378         }
4379
4380         ring_buffer_write_tail(header, data_tail);
4381         return ret;
4382 }
4383
4384 struct perf_buffer;
4385
4386 struct perf_buffer_params {
4387         struct perf_event_attr *attr;
4388         /* if event_cb is specified, it takes precendence */
4389         perf_buffer_event_fn event_cb;
4390         /* sample_cb and lost_cb are higher-level common-case callbacks */
4391         perf_buffer_sample_fn sample_cb;
4392         perf_buffer_lost_fn lost_cb;
4393         void *ctx;
4394         int cpu_cnt;
4395         int *cpus;
4396         int *map_keys;
4397 };
4398
4399 struct perf_cpu_buf {
4400         struct perf_buffer *pb;
4401         void *base; /* mmap()'ed memory */
4402         void *buf; /* for reconstructing segmented data */
4403         size_t buf_size;
4404         int fd;
4405         int cpu;
4406         int map_key;
4407 };
4408
4409 struct perf_buffer {
4410         perf_buffer_event_fn event_cb;
4411         perf_buffer_sample_fn sample_cb;
4412         perf_buffer_lost_fn lost_cb;
4413         void *ctx; /* passed into callbacks */
4414
4415         size_t page_size;
4416         size_t mmap_size;
4417         struct perf_cpu_buf **cpu_bufs;
4418         struct epoll_event *events;
4419         int cpu_cnt;
4420         int epoll_fd; /* perf event FD */
4421         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
4422 };
4423
4424 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
4425                                       struct perf_cpu_buf *cpu_buf)
4426 {
4427         if (!cpu_buf)
4428                 return;
4429         if (cpu_buf->base &&
4430             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
4431                 pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
4432         if (cpu_buf->fd >= 0) {
4433                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
4434                 close(cpu_buf->fd);
4435         }
4436         free(cpu_buf->buf);
4437         free(cpu_buf);
4438 }
4439
4440 void perf_buffer__free(struct perf_buffer *pb)
4441 {
4442         int i;
4443
4444         if (!pb)
4445                 return;
4446         if (pb->cpu_bufs) {
4447                 for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
4448                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
4449
4450                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
4451                         perf_buffer__free_cpu_buf(pb, cpu_buf);
4452                 }
4453                 free(pb->cpu_bufs);
4454         }
4455         if (pb->epoll_fd >= 0)
4456                 close(pb->epoll_fd);
4457         free(pb->events);
4458         free(pb);
4459 }
4460
4461 static struct perf_cpu_buf *
4462 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
4463                           int cpu, int map_key)
4464 {
4465         struct perf_cpu_buf *cpu_buf;
4466         char msg[STRERR_BUFSIZE];
4467         int err;
4468
4469         cpu_buf = calloc(1, sizeof(*cpu_buf));
4470         if (!cpu_buf)
4471                 return ERR_PTR(-ENOMEM);
4472
4473         cpu_buf->pb = pb;
4474         cpu_buf->cpu = cpu;
4475         cpu_buf->map_key = map_key;
4476
4477         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
4478                               -1, PERF_FLAG_FD_CLOEXEC);
4479         if (cpu_buf->fd < 0) {
4480                 err = -errno;
4481                 pr_warning("failed to open perf buffer event on cpu #%d: %s\n",
4482                            cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
4483                 goto error;
4484         }
4485
4486         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
4487                              PROT_READ | PROT_WRITE, MAP_SHARED,
4488                              cpu_buf->fd, 0);
4489         if (cpu_buf->base == MAP_FAILED) {
4490                 cpu_buf->base = NULL;
4491                 err = -errno;
4492                 pr_warning("failed to mmap perf buffer on cpu #%d: %s\n",
4493                            cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
4494                 goto error;
4495         }
4496
4497         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
4498                 err = -errno;
4499                 pr_warning("failed to enable perf buffer event on cpu #%d: %s\n",
4500                            cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
4501                 goto error;
4502         }
4503
4504         return cpu_buf;
4505
4506 error:
4507         perf_buffer__free_cpu_buf(pb, cpu_buf);
4508         return (struct perf_cpu_buf *)ERR_PTR(err);
4509 }
4510
4511 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
4512                                               struct perf_buffer_params *p);
4513
4514 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
4515                                      const struct perf_buffer_opts *opts)
4516 {
4517         struct perf_buffer_params p = {};
4518         struct perf_event_attr attr = {
4519                 .config = PERF_COUNT_SW_BPF_OUTPUT,
4520                 .type = PERF_TYPE_SOFTWARE,
4521                 .sample_type = PERF_SAMPLE_RAW,
4522                 .sample_period = 1,
4523                 .wakeup_events = 1,
4524         };
4525
4526         p.attr = &attr;
4527         p.sample_cb = opts ? opts->sample_cb : NULL;
4528         p.lost_cb = opts ? opts->lost_cb : NULL;
4529         p.ctx = opts ? opts->ctx : NULL;
4530
4531         return __perf_buffer__new(map_fd, page_cnt, &p);
4532 }
4533
4534 struct perf_buffer *
4535 perf_buffer__new_raw(int map_fd, size_t page_cnt,
4536                      const struct perf_buffer_raw_opts *opts)
4537 {
4538         struct perf_buffer_params p = {};
4539
4540         p.attr = opts->attr;
4541         p.event_cb = opts->event_cb;
4542         p.ctx = opts->ctx;
4543         p.cpu_cnt = opts->cpu_cnt;
4544         p.cpus = opts->cpus;
4545         p.map_keys = opts->map_keys;
4546
4547         return __perf_buffer__new(map_fd, page_cnt, &p);
4548 }
4549
4550 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
4551                                               struct perf_buffer_params *p)
4552 {
4553         struct bpf_map_info map = {};
4554         char msg[STRERR_BUFSIZE];
4555         struct perf_buffer *pb;
4556         __u32 map_info_len;
4557         int err, i;
4558
4559         if (page_cnt & (page_cnt - 1)) {
4560                 pr_warning("page count should be power of two, but is %zu\n",
4561                            page_cnt);
4562                 return ERR_PTR(-EINVAL);
4563         }
4564
4565         map_info_len = sizeof(map);
4566         err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);
4567         if (err) {
4568                 err = -errno;
4569                 pr_warning("failed to get map info for map FD %d: %s\n",
4570                            map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
4571                 return ERR_PTR(err);
4572         }
4573
4574         if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
4575                 pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
4576                            map.name);
4577                 return ERR_PTR(-EINVAL);
4578         }
4579
4580         pb = calloc(1, sizeof(*pb));
4581         if (!pb)
4582                 return ERR_PTR(-ENOMEM);
4583
4584         pb->event_cb = p->event_cb;
4585         pb->sample_cb = p->sample_cb;
4586         pb->lost_cb = p->lost_cb;
4587         pb->ctx = p->ctx;
4588
4589         pb->page_size = getpagesize();
4590         pb->mmap_size = pb->page_size * page_cnt;
4591         pb->map_fd = map_fd;
4592
4593         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
4594         if (pb->epoll_fd < 0) {
4595                 err = -errno;
4596                 pr_warning("failed to create epoll instance: %s\n",
4597                            libbpf_strerror_r(err, msg, sizeof(msg)));
4598                 goto error;
4599         }
4600
4601         if (p->cpu_cnt > 0) {
4602                 pb->cpu_cnt = p->cpu_cnt;
4603         } else {
4604                 pb->cpu_cnt = libbpf_num_possible_cpus();
4605                 if (pb->cpu_cnt < 0) {
4606                         err = pb->cpu_cnt;
4607                         goto error;
4608                 }
4609                 if (map.max_entries < pb->cpu_cnt)
4610                         pb->cpu_cnt = map.max_entries;
4611         }
4612
4613         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
4614         if (!pb->events) {
4615                 err = -ENOMEM;
4616                 pr_warning("failed to allocate events: out of memory\n");
4617                 goto error;
4618         }
4619         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
4620         if (!pb->cpu_bufs) {
4621                 err = -ENOMEM;
4622                 pr_warning("failed to allocate buffers: out of memory\n");
4623                 goto error;
4624         }
4625
4626         for (i = 0; i < pb->cpu_cnt; i++) {
4627                 struct perf_cpu_buf *cpu_buf;
4628                 int cpu, map_key;
4629
4630                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
4631                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
4632
4633                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
4634                 if (IS_ERR(cpu_buf)) {
4635                         err = PTR_ERR(cpu_buf);
4636                         goto error;
4637                 }
4638
4639                 pb->cpu_bufs[i] = cpu_buf;
4640
4641                 err = bpf_map_update_elem(pb->map_fd, &map_key,
4642                                           &cpu_buf->fd, 0);
4643                 if (err) {
4644                         err = -errno;
4645                         pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
4646                                    cpu, map_key, cpu_buf->fd,
4647                                    libbpf_strerror_r(err, msg, sizeof(msg)));
4648                         goto error;
4649                 }
4650
4651                 pb->events[i].events = EPOLLIN;
4652                 pb->events[i].data.ptr = cpu_buf;
4653                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
4654                               &pb->events[i]) < 0) {
4655                         err = -errno;
4656                         pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
4657                                    cpu, cpu_buf->fd,
4658                                    libbpf_strerror_r(err, msg, sizeof(msg)));
4659                         goto error;
4660                 }
4661         }
4662
4663         return pb;
4664
4665 error:
4666         if (pb)
4667                 perf_buffer__free(pb);
4668         return ERR_PTR(err);
4669 }
4670
4671 struct perf_sample_raw {
4672         struct perf_event_header header;
4673         uint32_t size;
4674         char data[0];
4675 };
4676
4677 struct perf_sample_lost {
4678         struct perf_event_header header;
4679         uint64_t id;
4680         uint64_t lost;
4681         uint64_t sample_id;
4682 };
4683
4684 static enum bpf_perf_event_ret
4685 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
4686 {
4687         struct perf_cpu_buf *cpu_buf = ctx;
4688         struct perf_buffer *pb = cpu_buf->pb;
4689         void *data = e;
4690
4691         /* user wants full control over parsing perf event */
4692         if (pb->event_cb)
4693                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
4694
4695         switch (e->type) {
4696         case PERF_RECORD_SAMPLE: {
4697                 struct perf_sample_raw *s = data;
4698
4699                 if (pb->sample_cb)
4700                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
4701                 break;
4702         }
4703         case PERF_RECORD_LOST: {
4704                 struct perf_sample_lost *s = data;
4705
4706                 if (pb->lost_cb)
4707                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
4708                 break;
4709         }
4710         default:
4711                 pr_warning("unknown perf sample type %d\n", e->type);
4712                 return LIBBPF_PERF_EVENT_ERROR;
4713         }
4714         return LIBBPF_PERF_EVENT_CONT;
4715 }
4716
4717 static int perf_buffer__process_records(struct perf_buffer *pb,
4718                                         struct perf_cpu_buf *cpu_buf)
4719 {
4720         enum bpf_perf_event_ret ret;
4721
4722         ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size,
4723                                          pb->page_size, &cpu_buf->buf,
4724                                          &cpu_buf->buf_size,
4725                                          perf_buffer__process_record, cpu_buf);
4726         if (ret != LIBBPF_PERF_EVENT_CONT)
4727                 return ret;
4728         return 0;
4729 }
4730
4731 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
4732 {
4733         int i, cnt, err;
4734
4735         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
4736         for (i = 0; i < cnt; i++) {
4737                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
4738
4739                 err = perf_buffer__process_records(pb, cpu_buf);
4740                 if (err) {
4741                         pr_warning("error while processing records: %d\n", err);
4742                         return err;
4743                 }
4744         }
4745         return cnt < 0 ? -errno : cnt;
4746 }
4747
4748 struct bpf_prog_info_array_desc {
4749         int     array_offset;   /* e.g. offset of jited_prog_insns */
4750         int     count_offset;   /* e.g. offset of jited_prog_len */
4751         int     size_offset;    /* > 0: offset of rec size,
4752                                  * < 0: fix size of -size_offset
4753                                  */
4754 };
4755
4756 static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {
4757         [BPF_PROG_INFO_JITED_INSNS] = {
4758                 offsetof(struct bpf_prog_info, jited_prog_insns),
4759                 offsetof(struct bpf_prog_info, jited_prog_len),
4760                 -1,
4761         },
4762         [BPF_PROG_INFO_XLATED_INSNS] = {
4763                 offsetof(struct bpf_prog_info, xlated_prog_insns),
4764                 offsetof(struct bpf_prog_info, xlated_prog_len),
4765                 -1,
4766         },
4767         [BPF_PROG_INFO_MAP_IDS] = {
4768                 offsetof(struct bpf_prog_info, map_ids),
4769                 offsetof(struct bpf_prog_info, nr_map_ids),
4770                 -(int)sizeof(__u32),
4771         },
4772         [BPF_PROG_INFO_JITED_KSYMS] = {
4773                 offsetof(struct bpf_prog_info, jited_ksyms),
4774                 offsetof(struct bpf_prog_info, nr_jited_ksyms),
4775                 -(int)sizeof(__u64),
4776         },
4777         [BPF_PROG_INFO_JITED_FUNC_LENS] = {
4778                 offsetof(struct bpf_prog_info, jited_func_lens),
4779                 offsetof(struct bpf_prog_info, nr_jited_func_lens),
4780                 -(int)sizeof(__u32),
4781         },
4782         [BPF_PROG_INFO_FUNC_INFO] = {
4783                 offsetof(struct bpf_prog_info, func_info),
4784                 offsetof(struct bpf_prog_info, nr_func_info),
4785                 offsetof(struct bpf_prog_info, func_info_rec_size),
4786         },
4787         [BPF_PROG_INFO_LINE_INFO] = {
4788                 offsetof(struct bpf_prog_info, line_info),
4789                 offsetof(struct bpf_prog_info, nr_line_info),
4790                 offsetof(struct bpf_prog_info, line_info_rec_size),
4791         },
4792         [BPF_PROG_INFO_JITED_LINE_INFO] = {
4793                 offsetof(struct bpf_prog_info, jited_line_info),
4794                 offsetof(struct bpf_prog_info, nr_jited_line_info),
4795                 offsetof(struct bpf_prog_info, jited_line_info_rec_size),
4796         },
4797         [BPF_PROG_INFO_PROG_TAGS] = {
4798                 offsetof(struct bpf_prog_info, prog_tags),
4799                 offsetof(struct bpf_prog_info, nr_prog_tags),
4800                 -(int)sizeof(__u8) * BPF_TAG_SIZE,
4801         },
4802
4803 };
4804
4805 static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset)
4806 {
4807         __u32 *array = (__u32 *)info;
4808
4809         if (offset >= 0)
4810                 return array[offset / sizeof(__u32)];
4811         return -(int)offset;
4812 }
4813
4814 static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset)
4815 {
4816         __u64 *array = (__u64 *)info;
4817
4818         if (offset >= 0)
4819                 return array[offset / sizeof(__u64)];
4820         return -(int)offset;
4821 }
4822
4823 static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset,
4824                                          __u32 val)
4825 {
4826         __u32 *array = (__u32 *)info;
4827
4828         if (offset >= 0)
4829                 array[offset / sizeof(__u32)] = val;
4830 }
4831
4832 static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset,
4833                                          __u64 val)
4834 {
4835         __u64 *array = (__u64 *)info;
4836
4837         if (offset >= 0)
4838                 array[offset / sizeof(__u64)] = val;
4839 }
4840
4841 struct bpf_prog_info_linear *
4842 bpf_program__get_prog_info_linear(int fd, __u64 arrays)
4843 {
4844         struct bpf_prog_info_linear *info_linear;
4845         struct bpf_prog_info info = {};
4846         __u32 info_len = sizeof(info);
4847         __u32 data_len = 0;
4848         int i, err;
4849         void *ptr;
4850
4851         if (arrays >> BPF_PROG_INFO_LAST_ARRAY)
4852                 return ERR_PTR(-EINVAL);
4853
4854         /* step 1: get array dimensions */
4855         err = bpf_obj_get_info_by_fd(fd, &info, &info_len);
4856         if (err) {
4857                 pr_debug("can't get prog info: %s", strerror(errno));
4858                 return ERR_PTR(-EFAULT);
4859         }
4860
4861         /* step 2: calculate total size of all arrays */
4862         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
4863                 bool include_array = (arrays & (1UL << i)) > 0;
4864                 struct bpf_prog_info_array_desc *desc;
4865                 __u32 count, size;
4866
4867                 desc = bpf_prog_info_array_desc + i;
4868
4869                 /* kernel is too old to support this field */
4870                 if (info_len < desc->array_offset + sizeof(__u32) ||
4871                     info_len < desc->count_offset + sizeof(__u32) ||
4872                     (desc->size_offset > 0 && info_len < desc->size_offset))
4873                         include_array = false;
4874
4875                 if (!include_array) {
4876                         arrays &= ~(1UL << i);  /* clear the bit */
4877                         continue;
4878                 }
4879
4880                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
4881                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
4882
4883                 data_len += count * size;
4884         }
4885
4886         /* step 3: allocate continuous memory */
4887         data_len = roundup(data_len, sizeof(__u64));
4888         info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len);
4889         if (!info_linear)
4890                 return ERR_PTR(-ENOMEM);
4891
4892         /* step 4: fill data to info_linear->info */
4893         info_linear->arrays = arrays;
4894         memset(&info_linear->info, 0, sizeof(info));
4895         ptr = info_linear->data;
4896
4897         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
4898                 struct bpf_prog_info_array_desc *desc;
4899                 __u32 count, size;
4900
4901                 if ((arrays & (1UL << i)) == 0)
4902                         continue;
4903
4904                 desc  = bpf_prog_info_array_desc + i;
4905                 count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
4906                 size  = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
4907                 bpf_prog_info_set_offset_u32(&info_linear->info,
4908                                              desc->count_offset, count);
4909                 bpf_prog_info_set_offset_u32(&info_linear->info,
4910                                              desc->size_offset, size);
4911                 bpf_prog_info_set_offset_u64(&info_linear->info,
4912                                              desc->array_offset,
4913                                              ptr_to_u64(ptr));
4914                 ptr += count * size;
4915         }
4916
4917         /* step 5: call syscall again to get required arrays */
4918         err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len);
4919         if (err) {
4920                 pr_debug("can't get prog info: %s", strerror(errno));
4921                 free(info_linear);
4922                 return ERR_PTR(-EFAULT);
4923         }
4924
4925         /* step 6: verify the data */
4926         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
4927                 struct bpf_prog_info_array_desc *desc;
4928                 __u32 v1, v2;
4929
4930                 if ((arrays & (1UL << i)) == 0)
4931                         continue;
4932
4933                 desc = bpf_prog_info_array_desc + i;
4934                 v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
4935                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
4936                                                    desc->count_offset);
4937                 if (v1 != v2)
4938                         pr_warning("%s: mismatch in element count\n", __func__);
4939
4940                 v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
4941                 v2 = bpf_prog_info_read_offset_u32(&info_linear->info,
4942                                                    desc->size_offset);
4943                 if (v1 != v2)
4944                         pr_warning("%s: mismatch in rec size\n", __func__);
4945         }
4946
4947         /* step 7: update info_len and data_len */
4948         info_linear->info_len = sizeof(struct bpf_prog_info);
4949         info_linear->data_len = data_len;
4950
4951         return info_linear;
4952 }
4953
4954 void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear)
4955 {
4956         int i;
4957
4958         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
4959                 struct bpf_prog_info_array_desc *desc;
4960                 __u64 addr, offs;
4961
4962                 if ((info_linear->arrays & (1UL << i)) == 0)
4963                         continue;
4964
4965                 desc = bpf_prog_info_array_desc + i;
4966                 addr = bpf_prog_info_read_offset_u64(&info_linear->info,
4967                                                      desc->array_offset);
4968                 offs = addr - ptr_to_u64(info_linear->data);
4969                 bpf_prog_info_set_offset_u64(&info_linear->info,
4970                                              desc->array_offset, offs);
4971         }
4972 }
4973
4974 void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
4975 {
4976         int i;
4977
4978         for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) {
4979                 struct bpf_prog_info_array_desc *desc;
4980                 __u64 addr, offs;
4981
4982                 if ((info_linear->arrays & (1UL << i)) == 0)
4983                         continue;
4984
4985                 desc = bpf_prog_info_array_desc + i;
4986                 offs = bpf_prog_info_read_offset_u64(&info_linear->info,
4987                                                      desc->array_offset);
4988                 addr = offs + ptr_to_u64(info_linear->data);
4989                 bpf_prog_info_set_offset_u64(&info_linear->info,
4990                                              desc->array_offset, addr);
4991         }
4992 }
4993
4994 int libbpf_num_possible_cpus(void)
4995 {
4996         static const char *fcpu = "/sys/devices/system/cpu/possible";
4997         int len = 0, n = 0, il = 0, ir = 0;
4998         unsigned int start = 0, end = 0;
4999         static int cpus;
5000         char buf[128];
5001         int error = 0;
5002         int fd = -1;
5003
5004         if (cpus > 0)
5005                 return cpus;
5006
5007         fd = open(fcpu, O_RDONLY);
5008         if (fd < 0) {
5009                 error = errno;
5010                 pr_warning("Failed to open file %s: %s\n",
5011                            fcpu, strerror(error));
5012                 return -error;
5013         }
5014         len = read(fd, buf, sizeof(buf));
5015         close(fd);
5016         if (len <= 0) {
5017                 error = len ? errno : EINVAL;
5018                 pr_warning("Failed to read # of possible cpus from %s: %s\n",
5019                            fcpu, strerror(error));
5020                 return -error;
5021         }
5022         if (len == sizeof(buf)) {
5023                 pr_warning("File %s size overflow\n", fcpu);
5024                 return -EOVERFLOW;
5025         }
5026         buf[len] = '\0';
5027
5028         for (ir = 0, cpus = 0; ir <= len; ir++) {
5029                 /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
5030                 if (buf[ir] == ',' || buf[ir] == '\0') {
5031                         buf[ir] = '\0';
5032                         n = sscanf(&buf[il], "%u-%u", &start, &end);
5033                         if (n <= 0) {
5034                                 pr_warning("Failed to get # CPUs from %s\n",
5035                                            &buf[il]);
5036                                 return -EINVAL;
5037                         } else if (n == 1) {
5038                                 end = start;
5039                         }
5040                         cpus += end - start + 1;
5041                         il = ir + 1;
5042                 }
5043         }
5044         if (cpus <= 0) {
5045                 pr_warning("Invalid #CPUs %d from %s\n", cpus, fcpu);
5046                 return -EINVAL;
5047         }
5048         return cpus;
5049 }