1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /* Copyright (c) 2019 Facebook */
13 #include "str_error.h"
14 #include "libbpf_internal.h"
16 #define BPF_CORE_SPEC_MAX_LEN 64
18 /* represents BPF CO-RE field or array element accessor */
19 struct bpf_core_accessor {
20 __u32 type_id; /* struct/union type or array element type */
21 __u32 idx; /* field index or array index */
22 const char *name; /* field name or NULL for array accessor */
25 struct bpf_core_spec {
26 const struct btf *btf;
27 /* high-level spec: named fields and array indices only */
28 struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN];
29 /* original unresolved (no skip_mods_or_typedefs) root type ID */
31 /* CO-RE relocation kind */
32 enum bpf_core_relo_kind relo_kind;
33 /* high-level spec length */
35 /* raw, low-level spec: 1-to-1 with accessor spec string */
36 int raw_spec[BPF_CORE_SPEC_MAX_LEN];
39 /* field bit offset represented by spec */
43 static bool is_flex_arr(const struct btf *btf,
44 const struct bpf_core_accessor *acc,
45 const struct btf_array *arr)
47 const struct btf_type *t;
49 /* not a flexible array, if not inside a struct or has non-zero size */
50 if (!acc->name || arr->nelems > 0)
53 /* has to be the last member of enclosing struct */
54 t = btf__type_by_id(btf, acc->type_id);
55 return acc->idx == btf_vlen(t) - 1;
58 static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)
61 case BPF_FIELD_BYTE_OFFSET: return "byte_off";
62 case BPF_FIELD_BYTE_SIZE: return "byte_sz";
63 case BPF_FIELD_EXISTS: return "field_exists";
64 case BPF_FIELD_SIGNED: return "signed";
65 case BPF_FIELD_LSHIFT_U64: return "lshift_u64";
66 case BPF_FIELD_RSHIFT_U64: return "rshift_u64";
67 case BPF_TYPE_ID_LOCAL: return "local_type_id";
68 case BPF_TYPE_ID_TARGET: return "target_type_id";
69 case BPF_TYPE_EXISTS: return "type_exists";
70 case BPF_TYPE_SIZE: return "type_size";
71 case BPF_ENUMVAL_EXISTS: return "enumval_exists";
72 case BPF_ENUMVAL_VALUE: return "enumval_value";
73 default: return "unknown";
77 static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)
80 case BPF_FIELD_BYTE_OFFSET:
81 case BPF_FIELD_BYTE_SIZE:
82 case BPF_FIELD_EXISTS:
83 case BPF_FIELD_SIGNED:
84 case BPF_FIELD_LSHIFT_U64:
85 case BPF_FIELD_RSHIFT_U64:
92 static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)
95 case BPF_TYPE_ID_LOCAL:
96 case BPF_TYPE_ID_TARGET:
105 static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)
108 case BPF_ENUMVAL_EXISTS:
109 case BPF_ENUMVAL_VALUE:
117 * Turn bpf_core_relo into a low- and high-level spec representation,
118 * validating correctness along the way, as well as calculating resulting
119 * field bit offset, specified by accessor string. Low-level spec captures
120 * every single level of nestedness, including traversing anonymous
121 * struct/union members. High-level one only captures semantically meaningful
122 * "turning points": named fields and array indicies.
123 * E.g., for this case:
134 * struct sample *s = ...;
136 * int x = &s->a[3]; // access string = '0:1:2:3'
138 * Low-level spec has 1:1 mapping with each element of access string (it's
139 * just a parsed access string representation): [0, 1, 2, 3].
141 * High-level spec will capture only 3 points:
142 * - intial zero-index access by pointer (&s->... is the same as &s[0]...);
143 * - field 'a' access (corresponds to '2' in low-level spec);
144 * - array element #3 access (corresponds to '3' in low-level spec).
146 * Type-based relocations (TYPE_EXISTS/TYPE_SIZE,
147 * TYPE_ID_LOCAL/TYPE_ID_TARGET) don't capture any field information. Their
148 * spec and raw_spec are kept empty.
150 * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access
151 * string to specify enumerator's value index that need to be relocated.
153 static int bpf_core_parse_spec(const struct btf *btf,
155 const char *spec_str,
156 enum bpf_core_relo_kind relo_kind,
157 struct bpf_core_spec *spec)
159 int access_idx, parsed_len, i;
160 struct bpf_core_accessor *acc;
161 const struct btf_type *t;
166 if (str_is_empty(spec_str) || *spec_str == ':')
169 memset(spec, 0, sizeof(*spec));
171 spec->root_type_id = type_id;
172 spec->relo_kind = relo_kind;
174 /* type-based relocations don't have a field access string */
175 if (core_relo_is_type_based(relo_kind)) {
176 if (strcmp(spec_str, "0"))
181 /* parse spec_str="0:1:2:3:4" into array raw_spec=[0, 1, 2, 3, 4] */
183 if (*spec_str == ':')
185 if (sscanf(spec_str, "%d%n", &access_idx, &parsed_len) != 1)
187 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
189 spec_str += parsed_len;
190 spec->raw_spec[spec->raw_len++] = access_idx;
193 if (spec->raw_len == 0)
196 t = skip_mods_and_typedefs(btf, type_id, &id);
200 access_idx = spec->raw_spec[0];
201 acc = &spec->spec[0];
203 acc->idx = access_idx;
206 if (core_relo_is_enumval_based(relo_kind)) {
207 if (!btf_is_enum(t) || spec->raw_len > 1 || access_idx >= btf_vlen(t))
210 /* record enumerator name in a first accessor */
211 acc->name = btf__name_by_offset(btf, btf_enum(t)[access_idx].name_off);
215 if (!core_relo_is_field_based(relo_kind))
218 sz = btf__resolve_size(btf, id);
221 spec->bit_offset = access_idx * sz * 8;
223 for (i = 1; i < spec->raw_len; i++) {
224 t = skip_mods_and_typedefs(btf, id, &id);
228 access_idx = spec->raw_spec[i];
229 acc = &spec->spec[spec->len];
231 if (btf_is_composite(t)) {
232 const struct btf_member *m;
235 if (access_idx >= btf_vlen(t))
238 bit_offset = btf_member_bit_offset(t, access_idx);
239 spec->bit_offset += bit_offset;
241 m = btf_members(t) + access_idx;
243 name = btf__name_by_offset(btf, m->name_off);
244 if (str_is_empty(name))
248 acc->idx = access_idx;
254 } else if (btf_is_array(t)) {
255 const struct btf_array *a = btf_array(t);
258 t = skip_mods_and_typedefs(btf, a->type, &id);
262 flex = is_flex_arr(btf, acc - 1, a);
263 if (!flex && access_idx >= a->nelems)
266 spec->spec[spec->len].type_id = id;
267 spec->spec[spec->len].idx = access_idx;
270 sz = btf__resolve_size(btf, id);
273 spec->bit_offset += access_idx * sz * 8;
275 pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n",
276 type_id, spec_str, i, id, btf_kind_str(t));
284 /* Check two types for compatibility for the purpose of field access
285 * relocation. const/volatile/restrict and typedefs are skipped to ensure we
286 * are relocating semantically compatible entities:
287 * - any two STRUCTs/UNIONs are compatible and can be mixed;
288 * - any two FWDs are compatible, if their names match (modulo flavor suffix);
289 * - any two PTRs are always compatible;
290 * - for ENUMs, names should be the same (ignoring flavor suffix) or at
291 * least one of enums should be anonymous;
292 * - for ENUMs, check sizes, names are ignored;
293 * - for INT, size and signedness are ignored;
294 * - any two FLOATs are always compatible;
295 * - for ARRAY, dimensionality is ignored, element types are checked for
296 * compatibility recursively;
297 * - everything else shouldn't be ever a target of relocation.
298 * These rules are not set in stone and probably will be adjusted as we get
299 * more experience with using BPF CO-RE relocations.
301 static int bpf_core_fields_are_compat(const struct btf *local_btf,
303 const struct btf *targ_btf,
306 const struct btf_type *local_type, *targ_type;
309 local_type = skip_mods_and_typedefs(local_btf, local_id, &local_id);
310 targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
311 if (!local_type || !targ_type)
314 if (btf_is_composite(local_type) && btf_is_composite(targ_type))
316 if (btf_kind(local_type) != btf_kind(targ_type))
319 switch (btf_kind(local_type)) {
324 case BTF_KIND_ENUM: {
325 const char *local_name, *targ_name;
326 size_t local_len, targ_len;
328 local_name = btf__name_by_offset(local_btf,
329 local_type->name_off);
330 targ_name = btf__name_by_offset(targ_btf, targ_type->name_off);
331 local_len = bpf_core_essential_name_len(local_name);
332 targ_len = bpf_core_essential_name_len(targ_name);
333 /* one of them is anonymous or both w/ same flavor-less names */
334 return local_len == 0 || targ_len == 0 ||
335 (local_len == targ_len &&
336 strncmp(local_name, targ_name, local_len) == 0);
339 /* just reject deprecated bitfield-like integers; all other
340 * integers are by default compatible between each other
342 return btf_int_offset(local_type) == 0 &&
343 btf_int_offset(targ_type) == 0;
345 local_id = btf_array(local_type)->type;
346 targ_id = btf_array(targ_type)->type;
349 pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n",
350 btf_kind(local_type), local_id, targ_id);
356 * Given single high-level named field accessor in local type, find
357 * corresponding high-level accessor for a target type. Along the way,
358 * maintain low-level spec for target as well. Also keep updating target
361 * Searching is performed through recursive exhaustive enumeration of all
362 * fields of a struct/union. If there are any anonymous (embedded)
363 * structs/unions, they are recursively searched as well. If field with
364 * desired name is found, check compatibility between local and target types,
365 * before returning result.
367 * 1 is returned, if field is found.
368 * 0 is returned if no compatible field is found.
369 * <0 is returned on error.
371 static int bpf_core_match_member(const struct btf *local_btf,
372 const struct bpf_core_accessor *local_acc,
373 const struct btf *targ_btf,
375 struct bpf_core_spec *spec,
378 const struct btf_type *local_type, *targ_type;
379 const struct btf_member *local_member, *m;
380 const char *local_name, *targ_name;
384 targ_type = skip_mods_and_typedefs(targ_btf, targ_id, &targ_id);
387 if (!btf_is_composite(targ_type))
390 local_id = local_acc->type_id;
391 local_type = btf__type_by_id(local_btf, local_id);
392 local_member = btf_members(local_type) + local_acc->idx;
393 local_name = btf__name_by_offset(local_btf, local_member->name_off);
395 n = btf_vlen(targ_type);
396 m = btf_members(targ_type);
397 for (i = 0; i < n; i++, m++) {
400 bit_offset = btf_member_bit_offset(targ_type, i);
402 /* too deep struct/union/array nesting */
403 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
406 /* speculate this member will be the good one */
407 spec->bit_offset += bit_offset;
408 spec->raw_spec[spec->raw_len++] = i;
410 targ_name = btf__name_by_offset(targ_btf, m->name_off);
411 if (str_is_empty(targ_name)) {
412 /* embedded struct/union, we need to go deeper */
413 found = bpf_core_match_member(local_btf, local_acc,
416 if (found) /* either found or error */
418 } else if (strcmp(local_name, targ_name) == 0) {
419 /* matching named field */
420 struct bpf_core_accessor *targ_acc;
422 targ_acc = &spec->spec[spec->len++];
423 targ_acc->type_id = targ_id;
425 targ_acc->name = targ_name;
427 *next_targ_id = m->type;
428 found = bpf_core_fields_are_compat(local_btf,
432 spec->len--; /* pop accessor */
435 /* member turned out not to be what we looked for */
436 spec->bit_offset -= bit_offset;
444 * Try to match local spec to a target type and, if successful, produce full
445 * target spec (high-level, low-level + bit offset).
447 static int bpf_core_spec_match(struct bpf_core_spec *local_spec,
448 const struct btf *targ_btf, __u32 targ_id,
449 struct bpf_core_spec *targ_spec)
451 const struct btf_type *targ_type;
452 const struct bpf_core_accessor *local_acc;
453 struct bpf_core_accessor *targ_acc;
456 memset(targ_spec, 0, sizeof(*targ_spec));
457 targ_spec->btf = targ_btf;
458 targ_spec->root_type_id = targ_id;
459 targ_spec->relo_kind = local_spec->relo_kind;
461 if (core_relo_is_type_based(local_spec->relo_kind)) {
462 return bpf_core_types_are_compat(local_spec->btf,
463 local_spec->root_type_id,
467 local_acc = &local_spec->spec[0];
468 targ_acc = &targ_spec->spec[0];
470 if (core_relo_is_enumval_based(local_spec->relo_kind)) {
471 size_t local_essent_len, targ_essent_len;
472 const struct btf_enum *e;
473 const char *targ_name;
475 /* has to resolve to an enum */
476 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id, &targ_id);
477 if (!btf_is_enum(targ_type))
480 local_essent_len = bpf_core_essential_name_len(local_acc->name);
482 for (i = 0, e = btf_enum(targ_type); i < btf_vlen(targ_type); i++, e++) {
483 targ_name = btf__name_by_offset(targ_spec->btf, e->name_off);
484 targ_essent_len = bpf_core_essential_name_len(targ_name);
485 if (targ_essent_len != local_essent_len)
487 if (strncmp(local_acc->name, targ_name, local_essent_len) == 0) {
488 targ_acc->type_id = targ_id;
490 targ_acc->name = targ_name;
492 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
493 targ_spec->raw_len++;
500 if (!core_relo_is_field_based(local_spec->relo_kind))
503 for (i = 0; i < local_spec->len; i++, local_acc++, targ_acc++) {
504 targ_type = skip_mods_and_typedefs(targ_spec->btf, targ_id,
509 if (local_acc->name) {
510 matched = bpf_core_match_member(local_spec->btf,
513 targ_spec, &targ_id);
517 /* for i=0, targ_id is already treated as array element
518 * type (because it's the original struct), for others
519 * we should find array element type first
522 const struct btf_array *a;
525 if (!btf_is_array(targ_type))
528 a = btf_array(targ_type);
529 flex = is_flex_arr(targ_btf, targ_acc - 1, a);
530 if (!flex && local_acc->idx >= a->nelems)
532 if (!skip_mods_and_typedefs(targ_btf, a->type,
537 /* too deep struct/union/array nesting */
538 if (targ_spec->raw_len == BPF_CORE_SPEC_MAX_LEN)
541 targ_acc->type_id = targ_id;
542 targ_acc->idx = local_acc->idx;
543 targ_acc->name = NULL;
545 targ_spec->raw_spec[targ_spec->raw_len] = targ_acc->idx;
546 targ_spec->raw_len++;
548 sz = btf__resolve_size(targ_btf, targ_id);
551 targ_spec->bit_offset += local_acc->idx * sz * 8;
558 static int bpf_core_calc_field_relo(const char *prog_name,
559 const struct bpf_core_relo *relo,
560 const struct bpf_core_spec *spec,
561 __u32 *val, __u32 *field_sz, __u32 *type_id,
564 const struct bpf_core_accessor *acc;
565 const struct btf_type *t;
566 __u32 byte_off, byte_sz, bit_off, bit_sz, field_type_id;
567 const struct btf_member *m;
568 const struct btf_type *mt;
574 if (relo->kind == BPF_FIELD_EXISTS) {
580 return -EUCLEAN; /* request instruction poisoning */
582 acc = &spec->spec[spec->len - 1];
583 t = btf__type_by_id(spec->btf, acc->type_id);
585 /* a[n] accessor needs special handling */
587 if (relo->kind == BPF_FIELD_BYTE_OFFSET) {
588 *val = spec->bit_offset / 8;
589 /* remember field size for load/store mem size */
590 sz = btf__resolve_size(spec->btf, acc->type_id);
594 *type_id = acc->type_id;
595 } else if (relo->kind == BPF_FIELD_BYTE_SIZE) {
596 sz = btf__resolve_size(spec->btf, acc->type_id);
601 pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n",
602 prog_name, relo->kind, relo->insn_off / 8);
610 m = btf_members(t) + acc->idx;
611 mt = skip_mods_and_typedefs(spec->btf, m->type, &field_type_id);
612 bit_off = spec->bit_offset;
613 bit_sz = btf_member_bitfield_size(t, acc->idx);
615 bitfield = bit_sz > 0;
618 byte_off = bit_off / 8 / byte_sz * byte_sz;
619 /* figure out smallest int size necessary for bitfield load */
620 while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) {
622 /* bitfield can't be read with 64-bit read */
623 pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n",
624 prog_name, relo->kind, relo->insn_off / 8);
628 byte_off = bit_off / 8 / byte_sz * byte_sz;
631 sz = btf__resolve_size(spec->btf, field_type_id);
635 byte_off = spec->bit_offset / 8;
636 bit_sz = byte_sz * 8;
639 /* for bitfields, all the relocatable aspects are ambiguous and we
640 * might disagree with compiler, so turn off validation of expected
641 * value, except for signedness
644 *validate = !bitfield;
646 switch (relo->kind) {
647 case BPF_FIELD_BYTE_OFFSET:
651 *type_id = field_type_id;
654 case BPF_FIELD_BYTE_SIZE:
657 case BPF_FIELD_SIGNED:
658 /* enums will be assumed unsigned */
659 *val = btf_is_enum(mt) ||
660 (btf_int_encoding(mt) & BTF_INT_SIGNED);
662 *validate = true; /* signedness is never ambiguous */
664 case BPF_FIELD_LSHIFT_U64:
665 #if __BYTE_ORDER == __LITTLE_ENDIAN
666 *val = 64 - (bit_off + bit_sz - byte_off * 8);
668 *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);
671 case BPF_FIELD_RSHIFT_U64:
674 *validate = true; /* right shift is never ambiguous */
676 case BPF_FIELD_EXISTS:
684 static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,
685 const struct bpf_core_spec *spec,
690 /* type-based relos return zero when target type is not found */
696 switch (relo->kind) {
697 case BPF_TYPE_ID_TARGET:
698 *val = spec->root_type_id;
700 case BPF_TYPE_EXISTS:
704 sz = btf__resolve_size(spec->btf, spec->root_type_id);
709 case BPF_TYPE_ID_LOCAL:
710 /* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */
718 static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,
719 const struct bpf_core_spec *spec,
722 const struct btf_type *t;
723 const struct btf_enum *e;
725 switch (relo->kind) {
726 case BPF_ENUMVAL_EXISTS:
729 case BPF_ENUMVAL_VALUE:
731 return -EUCLEAN; /* request instruction poisoning */
732 t = btf__type_by_id(spec->btf, spec->spec[0].type_id);
733 e = btf_enum(t) + spec->spec[0].idx;
743 struct bpf_core_relo_res
745 /* expected value in the instruction, unless validate == false */
747 /* new value that needs to be patched up to */
749 /* relocation unsuccessful, poison instruction, but don't fail load */
751 /* some relocations can't be validated against orig_val */
753 /* for field byte offset relocations or the forms:
754 * *(T *)(rX + <off>) = rY
755 * rX = *(T *)(rY + <off>),
756 * we remember original and resolved field size to adjust direct
757 * memory loads of pointers and integers; this is necessary for 32-bit
758 * host kernel architectures, but also allows to automatically
759 * relocate fields that were resized from, e.g., u32 to u64, etc.
761 bool fail_memsz_adjust;
768 /* Calculate original and target relocation values, given local and target
769 * specs and relocation kind. These values are calculated for each candidate.
770 * If there are multiple candidates, resulting values should all be consistent
771 * with each other. Otherwise, libbpf will refuse to proceed due to ambiguity.
772 * If instruction has to be poisoned, *poison will be set to true.
774 static int bpf_core_calc_relo(const char *prog_name,
775 const struct bpf_core_relo *relo,
777 const struct bpf_core_spec *local_spec,
778 const struct bpf_core_spec *targ_spec,
779 struct bpf_core_relo_res *res)
781 int err = -EOPNOTSUPP;
786 res->validate = true;
787 res->fail_memsz_adjust = false;
788 res->orig_sz = res->new_sz = 0;
789 res->orig_type_id = res->new_type_id = 0;
791 if (core_relo_is_field_based(relo->kind)) {
792 err = bpf_core_calc_field_relo(prog_name, relo, local_spec,
793 &res->orig_val, &res->orig_sz,
794 &res->orig_type_id, &res->validate);
795 err = err ?: bpf_core_calc_field_relo(prog_name, relo, targ_spec,
796 &res->new_val, &res->new_sz,
797 &res->new_type_id, NULL);
800 /* Validate if it's safe to adjust load/store memory size.
801 * Adjustments are performed only if original and new memory
804 res->fail_memsz_adjust = false;
805 if (res->orig_sz != res->new_sz) {
806 const struct btf_type *orig_t, *new_t;
808 orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id);
809 new_t = btf__type_by_id(targ_spec->btf, res->new_type_id);
811 /* There are two use cases in which it's safe to
812 * adjust load/store's mem size:
813 * - reading a 32-bit kernel pointer, while on BPF
814 * size pointers are always 64-bit; in this case
815 * it's safe to "downsize" instruction size due to
816 * pointer being treated as unsigned integer with
817 * zero-extended upper 32-bits;
818 * - reading unsigned integers, again due to
819 * zero-extension is preserving the value correctly.
821 * In all other cases it's incorrect to attempt to
822 * load/store field because read value will be
823 * incorrect, so we poison relocated instruction.
825 if (btf_is_ptr(orig_t) && btf_is_ptr(new_t))
827 if (btf_is_int(orig_t) && btf_is_int(new_t) &&
828 btf_int_encoding(orig_t) != BTF_INT_SIGNED &&
829 btf_int_encoding(new_t) != BTF_INT_SIGNED)
832 /* mark as invalid mem size adjustment, but this will
833 * only be checked for LDX/STX/ST insns
835 res->fail_memsz_adjust = true;
837 } else if (core_relo_is_type_based(relo->kind)) {
838 err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val);
839 err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val);
840 } else if (core_relo_is_enumval_based(relo->kind)) {
841 err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);
842 err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val);
846 if (err == -EUCLEAN) {
847 /* EUCLEAN is used to signal instruction poisoning request */
850 } else if (err == -EOPNOTSUPP) {
851 /* EOPNOTSUPP means unknown/unsupported relocation */
852 pr_warn("prog '%s': relo #%d: unrecognized CO-RE relocation %s (%d) at insn #%d\n",
853 prog_name, relo_idx, core_relo_kind_str(relo->kind),
854 relo->kind, relo->insn_off / 8);
861 * Turn instruction for which CO_RE relocation failed into invalid one with
862 * distinct signature.
864 static void bpf_core_poison_insn(const char *prog_name, int relo_idx,
865 int insn_idx, struct bpf_insn *insn)
867 pr_debug("prog '%s': relo #%d: substituting insn #%d w/ invalid insn\n",
868 prog_name, relo_idx, insn_idx);
869 insn->code = BPF_JMP | BPF_CALL;
873 /* if this instruction is reachable (not a dead code),
874 * verifier will complain with the following message:
875 * invalid func unknown#195896080
877 insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
880 static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
882 switch (BPF_SIZE(insn->code)) {
883 case BPF_DW: return 8;
884 case BPF_W: return 4;
885 case BPF_H: return 2;
886 case BPF_B: return 1;
891 static int insn_bytes_to_bpf_size(__u32 sz)
894 case 8: return BPF_DW;
895 case 4: return BPF_W;
896 case 2: return BPF_H;
897 case 1: return BPF_B;
903 * Patch relocatable BPF instruction.
905 * Patched value is determined by relocation kind and target specification.
906 * For existence relocations target spec will be NULL if field/type is not found.
907 * Expected insn->imm value is determined using relocation kind and local
908 * spec, and is checked before patching instruction. If actual insn->imm value
909 * is wrong, bail out with error.
911 * Currently supported classes of BPF instruction are:
912 * 1. rX = <imm> (assignment with immediate operand);
913 * 2. rX += <imm> (arithmetic operations with immediate operand);
914 * 3. rX = <imm64> (load with 64-bit immediate value);
915 * 4. rX = *(T *)(rY + <off>), where T is one of {u8, u16, u32, u64};
916 * 5. *(T *)(rX + <off>) = rY, where T is one of {u8, u16, u32, u64};
917 * 6. *(T *)(rX + <off>) = <imm>, where T is one of {u8, u16, u32, u64}.
919 static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn,
920 int insn_idx, const struct bpf_core_relo *relo,
921 int relo_idx, const struct bpf_core_relo_res *res)
923 __u32 orig_val, new_val;
926 class = BPF_CLASS(insn->code);
930 /* poison second part of ldimm64 to avoid confusing error from
931 * verifier about "unknown opcode 00"
933 if (is_ldimm64_insn(insn))
934 bpf_core_poison_insn(prog_name, relo_idx, insn_idx + 1, insn + 1);
935 bpf_core_poison_insn(prog_name, relo_idx, insn_idx, insn);
939 orig_val = res->orig_val;
940 new_val = res->new_val;
945 if (BPF_SRC(insn->code) != BPF_K)
947 if (res->validate && insn->imm != orig_val) {
948 pr_warn("prog '%s': relo #%d: unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n",
950 insn_idx, insn->imm, orig_val, new_val);
953 orig_val = insn->imm;
955 pr_debug("prog '%s': relo #%d: patched insn #%d (ALU/ALU64) imm %u -> %u\n",
956 prog_name, relo_idx, insn_idx,
962 if (res->validate && insn->off != orig_val) {
963 pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDX/ST/STX) value: got %u, exp %u -> %u\n",
964 prog_name, relo_idx, insn_idx, insn->off, orig_val, new_val);
967 if (new_val > SHRT_MAX) {
968 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) value too big: %u\n",
969 prog_name, relo_idx, insn_idx, new_val);
972 if (res->fail_memsz_adjust) {
973 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) accesses field incorrectly. "
974 "Make sure you are accessing pointers, unsigned integers, or fields of matching type and size.\n",
975 prog_name, relo_idx, insn_idx);
979 orig_val = insn->off;
981 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) off %u -> %u\n",
982 prog_name, relo_idx, insn_idx, orig_val, new_val);
984 if (res->new_sz != res->orig_sz) {
985 int insn_bytes_sz, insn_bpf_sz;
987 insn_bytes_sz = insn_bpf_size_to_bytes(insn);
988 if (insn_bytes_sz != res->orig_sz) {
989 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) unexpected mem size: got %d, exp %u\n",
990 prog_name, relo_idx, insn_idx, insn_bytes_sz, res->orig_sz);
994 insn_bpf_sz = insn_bytes_to_bpf_size(res->new_sz);
995 if (insn_bpf_sz < 0) {
996 pr_warn("prog '%s': relo #%d: insn #%d (LDX/ST/STX) invalid new mem size: %u\n",
997 prog_name, relo_idx, insn_idx, res->new_sz);
1001 insn->code = BPF_MODE(insn->code) | insn_bpf_sz | BPF_CLASS(insn->code);
1002 pr_debug("prog '%s': relo #%d: patched insn #%d (LDX/ST/STX) mem_sz %u -> %u\n",
1003 prog_name, relo_idx, insn_idx, res->orig_sz, res->new_sz);
1009 if (!is_ldimm64_insn(insn) ||
1010 insn[0].src_reg != 0 || insn[0].off != 0 ||
1011 insn[1].code != 0 || insn[1].dst_reg != 0 ||
1012 insn[1].src_reg != 0 || insn[1].off != 0) {
1013 pr_warn("prog '%s': relo #%d: insn #%d (LDIMM64) has unexpected form\n",
1014 prog_name, relo_idx, insn_idx);
1018 imm = insn[0].imm + ((__u64)insn[1].imm << 32);
1019 if (res->validate && imm != orig_val) {
1020 pr_warn("prog '%s': relo #%d: unexpected insn #%d (LDIMM64) value: got %llu, exp %u -> %u\n",
1021 prog_name, relo_idx,
1022 insn_idx, (unsigned long long)imm,
1027 insn[0].imm = new_val;
1028 insn[1].imm = 0; /* currently only 32-bit values are supported */
1029 pr_debug("prog '%s': relo #%d: patched insn #%d (LDIMM64) imm64 %llu -> %u\n",
1030 prog_name, relo_idx, insn_idx,
1031 (unsigned long long)imm, new_val);
1035 pr_warn("prog '%s': relo #%d: trying to relocate unrecognized insn #%d, code:0x%x, src:0x%x, dst:0x%x, off:0x%x, imm:0x%x\n",
1036 prog_name, relo_idx, insn_idx, insn->code,
1037 insn->src_reg, insn->dst_reg, insn->off, insn->imm);
1044 /* Output spec definition in the format:
1045 * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,
1046 * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b
1048 static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)
1050 const struct btf_type *t;
1051 const struct btf_enum *e;
1056 type_id = spec->root_type_id;
1057 t = btf__type_by_id(spec->btf, type_id);
1058 s = btf__name_by_offset(spec->btf, t->name_off);
1060 libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s);
1062 if (core_relo_is_type_based(spec->relo_kind))
1065 if (core_relo_is_enumval_based(spec->relo_kind)) {
1066 t = skip_mods_and_typedefs(spec->btf, type_id, NULL);
1067 e = btf_enum(t) + spec->raw_spec[0];
1068 s = btf__name_by_offset(spec->btf, e->name_off);
1070 libbpf_print(level, "::%s = %u", s, e->val);
1074 if (core_relo_is_field_based(spec->relo_kind)) {
1075 for (i = 0; i < spec->len; i++) {
1076 if (spec->spec[i].name)
1077 libbpf_print(level, ".%s", spec->spec[i].name);
1078 else if (i > 0 || spec->spec[i].idx > 0)
1079 libbpf_print(level, "[%u]", spec->spec[i].idx);
1082 libbpf_print(level, " (");
1083 for (i = 0; i < spec->raw_len; i++)
1084 libbpf_print(level, "%s%d", i == 0 ? "" : ":", spec->raw_spec[i]);
1086 if (spec->bit_offset % 8)
1087 libbpf_print(level, " @ offset %u.%u)",
1088 spec->bit_offset / 8, spec->bit_offset % 8);
1090 libbpf_print(level, " @ offset %u)", spec->bit_offset / 8);
1096 * CO-RE relocate single instruction.
1098 * The outline and important points of the algorithm:
1099 * 1. For given local type, find corresponding candidate target types.
1100 * Candidate type is a type with the same "essential" name, ignoring
1101 * everything after last triple underscore (___). E.g., `sample`,
1102 * `sample___flavor_one`, `sample___flavor_another_one`, are all candidates
1103 * for each other. Names with triple underscore are referred to as
1104 * "flavors" and are useful, among other things, to allow to
1105 * specify/support incompatible variations of the same kernel struct, which
1106 * might differ between different kernel versions and/or build
1109 * N.B. Struct "flavors" could be generated by bpftool's BTF-to-C
1110 * converter, when deduplicated BTF of a kernel still contains more than
1111 * one different types with the same name. In that case, ___2, ___3, etc
1112 * are appended starting from second name conflict. But start flavors are
1113 * also useful to be defined "locally", in BPF program, to extract same
1114 * data from incompatible changes between different kernel
1115 * versions/configurations. For instance, to handle field renames between
1116 * kernel versions, one can use two flavors of the struct name with the
1117 * same common name and use conditional relocations to extract that field,
1118 * depending on target kernel version.
1119 * 2. For each candidate type, try to match local specification to this
1120 * candidate target type. Matching involves finding corresponding
1121 * high-level spec accessors, meaning that all named fields should match,
1122 * as well as all array accesses should be within the actual bounds. Also,
1123 * types should be compatible (see bpf_core_fields_are_compat for details).
1124 * 3. It is supported and expected that there might be multiple flavors
1125 * matching the spec. As long as all the specs resolve to the same set of
1126 * offsets across all candidates, there is no error. If there is any
1127 * ambiguity, CO-RE relocation will fail. This is necessary to accomodate
1128 * imprefection of BTF deduplication, which can cause slight duplication of
1129 * the same BTF type, if some directly or indirectly referenced (by
1130 * pointer) type gets resolved to different actual types in different
1131 * object files. If such situation occurs, deduplicated BTF will end up
1132 * with two (or more) structurally identical types, which differ only in
1133 * types they refer to through pointer. This should be OK in most cases and
1135 * 4. Candidate types search is performed by linearly scanning through all
1136 * types in target BTF. It is anticipated that this is overall more
1137 * efficient memory-wise and not significantly worse (if not better)
1138 * CPU-wise compared to prebuilding a map from all local type names to
1139 * a list of candidate type names. It's also sped up by caching resolved
1140 * list of matching candidates per each local "root" type ID, that has at
1141 * least one bpf_core_relo associated with it. This list is shared
1142 * between multiple relocations for the same type ID and is updated as some
1143 * of the candidates are pruned due to structural incompatibility.
1145 int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,
1147 const struct bpf_core_relo *relo,
1149 const struct btf *local_btf,
1150 struct bpf_core_cand_list *cands)
1152 struct bpf_core_spec local_spec, cand_spec, targ_spec = {};
1153 struct bpf_core_relo_res cand_res, targ_res;
1154 const struct btf_type *local_type;
1155 const char *local_name;
1157 const char *spec_str;
1160 local_id = relo->type_id;
1161 local_type = btf__type_by_id(local_btf, local_id);
1165 local_name = btf__name_by_offset(local_btf, local_type->name_off);
1169 spec_str = btf__name_by_offset(local_btf, relo->access_str_off);
1170 if (str_is_empty(spec_str))
1173 err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec);
1175 pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",
1176 prog_name, relo_idx, local_id, btf_kind_str(local_type),
1177 str_is_empty(local_name) ? "<anon>" : local_name,
1182 pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,
1183 relo_idx, core_relo_kind_str(relo->kind), relo->kind);
1184 bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);
1185 libbpf_print(LIBBPF_DEBUG, "\n");
1187 /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */
1188 if (relo->kind == BPF_TYPE_ID_LOCAL) {
1189 targ_res.validate = true;
1190 targ_res.poison = false;
1191 targ_res.orig_val = local_spec.root_type_id;
1192 targ_res.new_val = local_spec.root_type_id;
1196 /* libbpf doesn't support candidate search for anonymous types */
1197 if (str_is_empty(spec_str)) {
1198 pr_warn("prog '%s': relo #%d: <%s> (%d) relocation doesn't support anonymous types\n",
1199 prog_name, relo_idx, core_relo_kind_str(relo->kind), relo->kind);
1204 for (i = 0, j = 0; i < cands->len; i++) {
1205 err = bpf_core_spec_match(&local_spec, cands->cands[i].btf,
1206 cands->cands[i].id, &cand_spec);
1208 pr_warn("prog '%s': relo #%d: error matching candidate #%d ",
1209 prog_name, relo_idx, i);
1210 bpf_core_dump_spec(LIBBPF_WARN, &cand_spec);
1211 libbpf_print(LIBBPF_WARN, ": %d\n", err);
1215 pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,
1216 relo_idx, err == 0 ? "non-matching" : "matching", i);
1217 bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);
1218 libbpf_print(LIBBPF_DEBUG, "\n");
1223 err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res);
1228 targ_res = cand_res;
1229 targ_spec = cand_spec;
1230 } else if (cand_spec.bit_offset != targ_spec.bit_offset) {
1231 /* if there are many field relo candidates, they
1232 * should all resolve to the same bit offset
1234 pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n",
1235 prog_name, relo_idx, cand_spec.bit_offset,
1236 targ_spec.bit_offset);
1238 } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {
1239 /* all candidates should result in the same relocation
1240 * decision and value, otherwise it's dangerous to
1241 * proceed due to ambiguity
1243 pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n",
1244 prog_name, relo_idx,
1245 cand_res.poison ? "failure" : "success", cand_res.new_val,
1246 targ_res.poison ? "failure" : "success", targ_res.new_val);
1250 cands->cands[j++] = cands->cands[i];
1254 * For BPF_FIELD_EXISTS relo or when used BPF program has field
1255 * existence checks or kernel version/config checks, it's expected
1256 * that we might not find any candidates. In this case, if field
1257 * wasn't found in any candidate, the list of candidates shouldn't
1258 * change at all, we'll just handle relocating appropriately,
1259 * depending on relo's kind.
1265 * If no candidates were found, it might be both a programmer error,
1266 * as well as expected case, depending whether instruction w/
1267 * relocation is guarded in some way that makes it unreachable (dead
1268 * code) if relocation can't be resolved. This is handled in
1269 * bpf_core_patch_insn() uniformly by replacing that instruction with
1270 * BPF helper call insn (using invalid helper ID). If that instruction
1271 * is indeed unreachable, then it will be ignored and eliminated by
1272 * verifier. If it was an error, then verifier will complain and point
1273 * to a specific instruction number in its log.
1276 pr_debug("prog '%s': relo #%d: no matching targets found\n",
1277 prog_name, relo_idx);
1279 /* calculate single target relo result explicitly */
1280 err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res);
1286 /* bpf_core_patch_insn() should know how to handle missing targ_spec */
1287 err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res);
1289 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
1290 prog_name, relo_idx, relo->insn_off / 8, err);