kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/bpf-cgroup.h>
   8 #include <linux/kernel.h>
   9 #include <linux/types.h>
  10 #include <linux/slab.h>
  11 #include <linux/bpf.h>
  12 #include <linux/btf.h>
  13 #include <linux/bpf_verifier.h>
  14 #include <linux/filter.h>
  15 #include <net/netlink.h>
  16 #include <linux/file.h>
  17 #include <linux/vmalloc.h>
  18 #include <linux/stringify.h>
  19 #include <linux/bsearch.h>
  20 #include <linux/sort.h>
  21 #include <linux/perf_event.h>
  22 #include <linux/ctype.h>
  23 #include <linux/error-injection.h>
  24 #include <linux/bpf_lsm.h>
  25 #include <linux/btf_ids.h>
  26 #include <linux/poison.h>
  27 #include <linux/module.h>
  28 #include <linux/cpumask.h>
  29 #include <net/xdp.h>
  30
  31 #include "disasm.h"
  32
  33 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  34 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  35         [_id] = & _name ## _verifier_ops,
  36 #define BPF_MAP_TYPE(_id, _ops)
  37 #define BPF_LINK_TYPE(_id, _name)
  38 #include <linux/bpf_types.h>
  39 #undef BPF_PROG_TYPE
  40 #undef BPF_MAP_TYPE
  41 #undef BPF_LINK_TYPE
  42 };
  43
  44 /* bpf_check() is a static code analyzer that walks eBPF program
  45  * instruction by instruction and updates register/stack state.
  46  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  47  *
  48  * The first pass is depth-first-search to check that the program is a DAG.
  49  * It rejects the following programs:
  50  * - larger than BPF_MAXINSNS insns
  51  * - if loop is present (detected via back-edge)
  52  * - unreachable insns exist (shouldn't be a forest. program = one function)
  53  * - out of bounds or malformed jumps
  54  * The second pass is all possible path descent from the 1st insn.
  55  * Since it's analyzing all paths through the program, the length of the
  56  * analysis is limited to 64k insn, which may be hit even if total number of
  57  * insn is less then 4K, but there are too many branches that change stack/regs.
  58  * Number of 'branches to be analyzed' is limited to 1k
  59  *
  60  * On entry to each instruction, each register has a type, and the instruction
  61  * changes the types of the registers depending on instruction semantics.
  62  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  63  * copied to R1.
  64  *
  65  * All registers are 64-bit.
  66  * R0 - return register
  67  * R1-R5 argument passing registers
  68  * R6-R9 callee saved registers
  69  * R10 - frame pointer read-only
  70  *
  71  * At the start of BPF program the register R1 contains a pointer to bpf_context
  72  * and has type PTR_TO_CTX.
  73  *
  74  * Verifier tracks arithmetic operations on pointers in case:
  75  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  76  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  77  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  78  * and 2nd arithmetic instruction is pattern matched to recognize
  79  * that it wants to construct a pointer to some element within stack.
  80  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  81  * (and -20 constant is saved for further stack bounds checking).
  82  * Meaning that this reg is a pointer to stack plus known immediate constant.
  83  *
  84  * Most of the time the registers have SCALAR_VALUE type, which
  85  * means the register has some value, but it's not a valid pointer.
  86  * (like pointer plus pointer becomes SCALAR_VALUE type)
  87  *
  88  * When verifier sees load or store instructions the type of base register
  89  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  90  * four pointer types recognized by check_mem_access() function.
  91  *
  92  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  93  * and the range of [ptr, ptr + map's value_size) is accessible.
  94  *
  95  * registers used to pass values to function calls are checked against
  96  * function argument constraints.
  97  *
  98  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  99  * It means that the register type passed to this function must be
 100  * PTR_TO_STACK and it will be used inside the function as
 101  * 'pointer to map element key'
 102  *
 103  * For example the argument constraints for bpf_map_lookup_elem():
 104  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 105  *   .arg1_type = ARG_CONST_MAP_PTR,
 106  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 107  *
 108  * ret_type says that this function returns 'pointer to map elem value or null'
 109  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 110  * 2nd argument should be a pointer to stack, which will be used inside
 111  * the helper function as a pointer to map element key.
 112  *
 113  * On the kernel side the helper function looks like:
 114  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 115  * {
 116  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 117  *    void *key = (void *) (unsigned long) r2;
 118  *    void *value;
 119  *
 120  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 121  *    [key, key + map->key_size) bytes are valid and were initialized on
 122  *    the stack of eBPF program.
 123  * }
 124  *
 125  * Corresponding eBPF program may look like:
 126  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 127  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 128  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 129  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 130  * here verifier looks at prototype of map_lookup_elem() and sees:
 131  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 132  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 133  *
 134  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 135  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 136  * and were initialized prior to this call.
 137  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 138  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 139  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 140  * returns either pointer to map value or NULL.
 141  *
 142  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 143  * insn, the register holding that pointer in the true branch changes state to
 144  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 145  * branch. See check_cond_jmp_op().
 146  *
 147  * After the call R0 is set to return type of the function and registers R1-R5
 148  * are set to NOT_INIT to indicate that they are no longer readable.
 149  *
 150  * The following reference types represent a potential reference to a kernel
 151  * resource which, after first being allocated, must be checked and freed by
 152  * the BPF program:
 153  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 154  *
 155  * When the verifier sees a helper call return a reference type, it allocates a
 156  * pointer id for the reference and stores it in the current function state.
 157  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 158  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 159  * passes through a NULL-check conditional. For the branch wherein the state is
 160  * changed to CONST_IMM, the verifier releases the reference.
 161  *
 162  * For each helper function that allocates a reference, such as
 163  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 164  * bpf_sk_release(). When a reference type passes into the release function,
 165  * the verifier also releases the reference. If any unchecked or unreleased
 166  * reference remains at the end of the program, the verifier rejects it.
 167  */
 168
 169 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 170 struct bpf_verifier_stack_elem {
 171         /* verifer state is 'st'
 172          * before processing instruction 'insn_idx'
 173          * and after processing instruction 'prev_insn_idx'
 174          */
 175         struct bpf_verifier_state st;
 176         int insn_idx;
 177         int prev_insn_idx;
 178         struct bpf_verifier_stack_elem *next;
 179         /* length of verifier log at the time this state was pushed on stack */
 180         u32 log_pos;
 181 };
 182
 183 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 184 #define BPF_COMPLEXITY_LIMIT_STATES     64
 185
 186 #define BPF_MAP_KEY_POISON      (1ULL << 63)
 187 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
 188
 189 #define BPF_MAP_PTR_UNPRIV      1UL
 190 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 191                                           POISON_POINTER_DELTA))
 192 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 193
 194 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
 195 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
 196 static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
 197 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env);
 198 static int ref_set_non_owning(struct bpf_verifier_env *env,
 199                               struct bpf_reg_state *reg);
 200 static void specialize_kfunc(struct bpf_verifier_env *env,
 201                              u32 func_id, u16 offset, unsigned long *addr);
 202 static bool is_trusted_reg(const struct bpf_reg_state *reg);
 203
 204 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 205 {
 206         return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 207 }
 208
 209 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 210 {
 211         return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 212 }
 213
 214 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 215                               const struct bpf_map *map, bool unpriv)
 216 {
 217         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 218         unpriv |= bpf_map_ptr_unpriv(aux);
 219         aux->map_ptr_state = (unsigned long)map |
 220                              (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 221 }
 222
 223 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 224 {
 225         return aux->map_key_state & BPF_MAP_KEY_POISON;
 226 }
 227
 228 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 229 {
 230         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 231 }
 232
 233 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 234 {
 235         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 236 }
 237
 238 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 239 {
 240         bool poisoned = bpf_map_key_poisoned(aux);
 241
 242         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 243                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 244 }
 245
 246 static bool bpf_helper_call(const struct bpf_insn *insn)
 247 {
 248         return insn->code == (BPF_JMP | BPF_CALL) &&
 249                insn->src_reg == 0;
 250 }
 251
 252 static bool bpf_pseudo_call(const struct bpf_insn *insn)
 253 {
 254         return insn->code == (BPF_JMP | BPF_CALL) &&
 255                insn->src_reg == BPF_PSEUDO_CALL;
 256 }
 257
 258 static bool bpf_pseudo_kfunc_call(const struct bpf_insn *insn)
 259 {
 260         return insn->code == (BPF_JMP | BPF_CALL) &&
 261                insn->src_reg == BPF_PSEUDO_KFUNC_CALL;
 262 }
 263
 264 struct bpf_call_arg_meta {
 265         struct bpf_map *map_ptr;
 266         bool raw_mode;
 267         bool pkt_access;
 268         u8 release_regno;
 269         int regno;
 270         int access_size;
 271         int mem_size;
 272         u64 msize_max_value;
 273         int ref_obj_id;
 274         int dynptr_id;
 275         int map_uid;
 276         int func_id;
 277         struct btf *btf;
 278         u32 btf_id;
 279         struct btf *ret_btf;
 280         u32 ret_btf_id;
 281         u32 subprogno;
 282         struct btf_field *kptr_field;
 283 };
 284
 285 struct bpf_kfunc_call_arg_meta {
 286         /* In parameters */
 287         struct btf *btf;
 288         u32 func_id;
 289         u32 kfunc_flags;
 290         const struct btf_type *func_proto;
 291         const char *func_name;
 292         /* Out parameters */
 293         u32 ref_obj_id;
 294         u8 release_regno;
 295         bool r0_rdonly;
 296         u32 ret_btf_id;
 297         u64 r0_size;
 298         u32 subprogno;
 299         struct {
 300                 u64 value;
 301                 bool found;
 302         } arg_constant;
 303
 304         /* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
 305          * generally to pass info about user-defined local kptr types to later
 306          * verification logic
 307          *   bpf_obj_drop
 308          *     Record the local kptr type to be drop'd
 309          *   bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
 310          *     Record the local kptr type to be refcount_incr'd and use
 311          *     arg_owning_ref to determine whether refcount_acquire should be
 312          *     fallible
 313          */
 314         struct btf *arg_btf;
 315         u32 arg_btf_id;
 316         bool arg_owning_ref;
 317
 318         struct {
 319                 struct btf_field *field;
 320         } arg_list_head;
 321         struct {
 322                 struct btf_field *field;
 323         } arg_rbtree_root;
 324         struct {
 325                 enum bpf_dynptr_type type;
 326                 u32 id;
 327                 u32 ref_obj_id;
 328         } initialized_dynptr;
 329         struct {
 330                 u8 spi;
 331                 u8 frameno;
 332         } iter;
 333         u64 mem_size;
 334 };
 335
 336 struct btf *btf_vmlinux;
 337
 338 static DEFINE_MUTEX(bpf_verifier_lock);
 339
 340 static const struct bpf_line_info *
 341 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 342 {
 343         const struct bpf_line_info *linfo;
 344         const struct bpf_prog *prog;
 345         u32 i, nr_linfo;
 346
 347         prog = env->prog;
 348         nr_linfo = prog->aux->nr_linfo;
 349
 350         if (!nr_linfo || insn_off >= prog->len)
 351                 return NULL;
 352
 353         linfo = prog->aux->linfo;
 354         for (i = 1; i < nr_linfo; i++)
 355                 if (insn_off < linfo[i].insn_off)
 356                         break;
 357
 358         return &linfo[i - 1];
 359 }
 360
 361 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 362 {
 363         struct bpf_verifier_env *env = private_data;
 364         va_list args;
 365
 366         if (!bpf_verifier_log_needed(&env->log))
 367                 return;
 368
 369         va_start(args, fmt);
 370         bpf_verifier_vlog(&env->log, fmt, args);
 371         va_end(args);
 372 }
 373
 374 static const char *ltrim(const char *s)
 375 {
 376         while (isspace(*s))
 377                 s++;
 378
 379         return s;
 380 }
 381
 382 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 383                                          u32 insn_off,
 384                                          const char *prefix_fmt, ...)
 385 {
 386         const struct bpf_line_info *linfo;
 387
 388         if (!bpf_verifier_log_needed(&env->log))
 389                 return;
 390
 391         linfo = find_linfo(env, insn_off);
 392         if (!linfo || linfo == env->prev_linfo)
 393                 return;
 394
 395         if (prefix_fmt) {
 396                 va_list args;
 397
 398                 va_start(args, prefix_fmt);
 399                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 400                 va_end(args);
 401         }
 402
 403         verbose(env, "%s\n",
 404                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 405                                          linfo->line_off)));
 406
 407         env->prev_linfo = linfo;
 408 }
 409
 410 static void verbose_invalid_scalar(struct bpf_verifier_env *env,
 411                                    struct bpf_reg_state *reg,
 412                                    struct tnum *range, const char *ctx,
 413                                    const char *reg_name)
 414 {
 415         char tn_buf[48];
 416
 417         verbose(env, "At %s the register %s ", ctx, reg_name);
 418         if (!tnum_is_unknown(reg->var_off)) {
 419                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 420                 verbose(env, "has value %s", tn_buf);
 421         } else {
 422                 verbose(env, "has unknown scalar value");
 423         }
 424         tnum_strn(tn_buf, sizeof(tn_buf), *range);
 425         verbose(env, " should have been in %s\n", tn_buf);
 426 }
 427
 428 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 429 {
 430         type = base_type(type);
 431         return type == PTR_TO_PACKET ||
 432                type == PTR_TO_PACKET_META;
 433 }
 434
 435 static bool type_is_sk_pointer(enum bpf_reg_type type)
 436 {
 437         return type == PTR_TO_SOCKET ||
 438                 type == PTR_TO_SOCK_COMMON ||
 439                 type == PTR_TO_TCP_SOCK ||
 440                 type == PTR_TO_XDP_SOCK;
 441 }
 442
 443 static bool type_may_be_null(u32 type)
 444 {
 445         return type & PTR_MAYBE_NULL;
 446 }
 447
 448 static bool reg_not_null(const struct bpf_reg_state *reg)
 449 {
 450         enum bpf_reg_type type;
 451
 452         type = reg->type;
 453         if (type_may_be_null(type))
 454                 return false;
 455
 456         type = base_type(type);
 457         return type == PTR_TO_SOCKET ||
 458                 type == PTR_TO_TCP_SOCK ||
 459                 type == PTR_TO_MAP_VALUE ||
 460                 type == PTR_TO_MAP_KEY ||
 461                 type == PTR_TO_SOCK_COMMON ||
 462                 (type == PTR_TO_BTF_ID && is_trusted_reg(reg)) ||
 463                 type == PTR_TO_MEM;
 464 }
 465
 466 static bool type_is_ptr_alloc_obj(u32 type)
 467 {
 468         return base_type(type) == PTR_TO_BTF_ID && type_flag(type) & MEM_ALLOC;
 469 }
 470
 471 static bool type_is_non_owning_ref(u32 type)
 472 {
 473         return type_is_ptr_alloc_obj(type) && type_flag(type) & NON_OWN_REF;
 474 }
 475
 476 static struct btf_record *reg_btf_record(const struct bpf_reg_state *reg)
 477 {
 478         struct btf_record *rec = NULL;
 479         struct btf_struct_meta *meta;
 480
 481         if (reg->type == PTR_TO_MAP_VALUE) {
 482                 rec = reg->map_ptr->record;
 483         } else if (type_is_ptr_alloc_obj(reg->type)) {
 484                 meta = btf_find_struct_meta(reg->btf, reg->btf_id);
 485                 if (meta)
 486                         rec = meta->record;
 487         }
 488         return rec;
 489 }
 490
 491 static bool subprog_is_global(const struct bpf_verifier_env *env, int subprog)
 492 {
 493         struct bpf_func_info_aux *aux = env->prog->aux->func_info_aux;
 494
 495         return aux && aux[subprog].linkage == BTF_FUNC_GLOBAL;
 496 }
 497
 498 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 499 {
 500         return btf_record_has_field(reg_btf_record(reg), BPF_SPIN_LOCK);
 501 }
 502
 503 static bool type_is_rdonly_mem(u32 type)
 504 {
 505         return type & MEM_RDONLY;
 506 }
 507
 508 static bool is_acquire_function(enum bpf_func_id func_id,
 509                                 const struct bpf_map *map)
 510 {
 511         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 512
 513         if (func_id == BPF_FUNC_sk_lookup_tcp ||
 514             func_id == BPF_FUNC_sk_lookup_udp ||
 515             func_id == BPF_FUNC_skc_lookup_tcp ||
 516             func_id == BPF_FUNC_ringbuf_reserve ||
 517             func_id == BPF_FUNC_kptr_xchg)
 518                 return true;
 519
 520         if (func_id == BPF_FUNC_map_lookup_elem &&
 521             (map_type == BPF_MAP_TYPE_SOCKMAP ||
 522              map_type == BPF_MAP_TYPE_SOCKHASH))
 523                 return true;
 524
 525         return false;
 526 }
 527
 528 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 529 {
 530         return func_id == BPF_FUNC_tcp_sock ||
 531                 func_id == BPF_FUNC_sk_fullsock ||
 532                 func_id == BPF_FUNC_skc_to_tcp_sock ||
 533                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
 534                 func_id == BPF_FUNC_skc_to_udp6_sock ||
 535                 func_id == BPF_FUNC_skc_to_mptcp_sock ||
 536                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 537                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
 538 }
 539
 540 static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 541 {
 542         return func_id == BPF_FUNC_dynptr_data;
 543 }
 544
 545 static bool is_callback_calling_kfunc(u32 btf_id);
 546
 547 static bool is_callback_calling_function(enum bpf_func_id func_id)
 548 {
 549         return func_id == BPF_FUNC_for_each_map_elem ||
 550                func_id == BPF_FUNC_timer_set_callback ||
 551                func_id == BPF_FUNC_find_vma ||
 552                func_id == BPF_FUNC_loop ||
 553                func_id == BPF_FUNC_user_ringbuf_drain;
 554 }
 555
 556 static bool is_async_callback_calling_function(enum bpf_func_id func_id)
 557 {
 558         return func_id == BPF_FUNC_timer_set_callback;
 559 }
 560
 561 static bool is_storage_get_function(enum bpf_func_id func_id)
 562 {
 563         return func_id == BPF_FUNC_sk_storage_get ||
 564                func_id == BPF_FUNC_inode_storage_get ||
 565                func_id == BPF_FUNC_task_storage_get ||
 566                func_id == BPF_FUNC_cgrp_storage_get;
 567 }
 568
 569 static bool helper_multiple_ref_obj_use(enum bpf_func_id func_id,
 570                                         const struct bpf_map *map)
 571 {
 572         int ref_obj_uses = 0;
 573
 574         if (is_ptr_cast_function(func_id))
 575                 ref_obj_uses++;
 576         if (is_acquire_function(func_id, map))
 577                 ref_obj_uses++;
 578         if (is_dynptr_ref_function(func_id))
 579                 ref_obj_uses++;
 580
 581         return ref_obj_uses > 1;
 582 }
 583
 584 static bool is_cmpxchg_insn(const struct bpf_insn *insn)
 585 {
 586         return BPF_CLASS(insn->code) == BPF_STX &&
 587                BPF_MODE(insn->code) == BPF_ATOMIC &&
 588                insn->imm == BPF_CMPXCHG;
 589 }
 590
 591 /* string representation of 'enum bpf_reg_type'
 592  *
 593  * Note that reg_type_str() can not appear more than once in a single verbose()
 594  * statement.
 595  */
 596 static const char *reg_type_str(struct bpf_verifier_env *env,
 597                                 enum bpf_reg_type type)
 598 {
 599         char postfix[16] = {0}, prefix[64] = {0};
 600         static const char * const str[] = {
 601                 [NOT_INIT]              = "?",
 602                 [SCALAR_VALUE]          = "scalar",
 603                 [PTR_TO_CTX]            = "ctx",
 604                 [CONST_PTR_TO_MAP]      = "map_ptr",
 605                 [PTR_TO_MAP_VALUE]      = "map_value",
 606                 [PTR_TO_STACK]          = "fp",
 607                 [PTR_TO_PACKET]         = "pkt",
 608                 [PTR_TO_PACKET_META]    = "pkt_meta",
 609                 [PTR_TO_PACKET_END]     = "pkt_end",
 610                 [PTR_TO_FLOW_KEYS]      = "flow_keys",
 611                 [PTR_TO_SOCKET]         = "sock",
 612                 [PTR_TO_SOCK_COMMON]    = "sock_common",
 613                 [PTR_TO_TCP_SOCK]       = "tcp_sock",
 614                 [PTR_TO_TP_BUFFER]      = "tp_buffer",
 615                 [PTR_TO_XDP_SOCK]       = "xdp_sock",
 616                 [PTR_TO_BTF_ID]         = "ptr_",
 617                 [PTR_TO_MEM]            = "mem",
 618                 [PTR_TO_BUF]            = "buf",
 619                 [PTR_TO_FUNC]           = "func",
 620                 [PTR_TO_MAP_KEY]        = "map_key",
 621                 [CONST_PTR_TO_DYNPTR]   = "dynptr_ptr",
 622         };
 623
 624         if (type & PTR_MAYBE_NULL) {
 625                 if (base_type(type) == PTR_TO_BTF_ID)
 626                         strncpy(postfix, "or_null_", 16);
 627                 else
 628                         strncpy(postfix, "_or_null", 16);
 629         }
 630
 631         snprintf(prefix, sizeof(prefix), "%s%s%s%s%s%s%s",
 632                  type & MEM_RDONLY ? "rdonly_" : "",
 633                  type & MEM_RINGBUF ? "ringbuf_" : "",
 634                  type & MEM_USER ? "user_" : "",
 635                  type & MEM_PERCPU ? "percpu_" : "",
 636                  type & MEM_RCU ? "rcu_" : "",
 637                  type & PTR_UNTRUSTED ? "untrusted_" : "",
 638                  type & PTR_TRUSTED ? "trusted_" : ""
 639         );
 640
 641         snprintf(env->tmp_str_buf, TMP_STR_BUF_LEN, "%s%s%s",
 642                  prefix, str[base_type(type)], postfix);
 643         return env->tmp_str_buf;
 644 }
 645
 646 static char slot_type_char[] = {
 647         [STACK_INVALID] = '?',
 648         [STACK_SPILL]   = 'r',
 649         [STACK_MISC]    = 'm',
 650         [STACK_ZERO]    = '0',
 651         [STACK_DYNPTR]  = 'd',
 652         [STACK_ITER]    = 'i',
 653 };
 654
 655 static void print_liveness(struct bpf_verifier_env *env,
 656                            enum bpf_reg_liveness live)
 657 {
 658         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 659             verbose(env, "_");
 660         if (live & REG_LIVE_READ)
 661                 verbose(env, "r");
 662         if (live & REG_LIVE_WRITTEN)
 663                 verbose(env, "w");
 664         if (live & REG_LIVE_DONE)
 665                 verbose(env, "D");
 666 }
 667
 668 static int __get_spi(s32 off)
 669 {
 670         return (-off - 1) / BPF_REG_SIZE;
 671 }
 672
 673 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 674                                    const struct bpf_reg_state *reg)
 675 {
 676         struct bpf_verifier_state *cur = env->cur_state;
 677
 678         return cur->frame[reg->frameno];
 679 }
 680
 681 static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
 682 {
 683        int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
 684
 685        /* We need to check that slots between [spi - nr_slots + 1, spi] are
 686         * within [0, allocated_stack).
 687         *
 688         * Please note that the spi grows downwards. For example, a dynptr
 689         * takes the size of two stack slots; the first slot will be at
 690         * spi and the second slot will be at spi - 1.
 691         */
 692        return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
 693 }
 694
 695 static int stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 696                                   const char *obj_kind, int nr_slots)
 697 {
 698         int off, spi;
 699
 700         if (!tnum_is_const(reg->var_off)) {
 701                 verbose(env, "%s has to be at a constant offset\n", obj_kind);
 702                 return -EINVAL;
 703         }
 704
 705         off = reg->off + reg->var_off.value;
 706         if (off % BPF_REG_SIZE) {
 707                 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
 708                 return -EINVAL;
 709         }
 710
 711         spi = __get_spi(off);
 712         if (spi + 1 < nr_slots) {
 713                 verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off);
 714                 return -EINVAL;
 715         }
 716
 717         if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots))
 718                 return -ERANGE;
 719         return spi;
 720 }
 721
 722 static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 723 {
 724         return stack_slot_obj_get_spi(env, reg, "dynptr", BPF_DYNPTR_NR_SLOTS);
 725 }
 726
 727 static int iter_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
 728 {
 729         return stack_slot_obj_get_spi(env, reg, "iter", nr_slots);
 730 }
 731
 732 static const char *btf_type_name(const struct btf *btf, u32 id)
 733 {
 734         return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
 735 }
 736
 737 static const char *dynptr_type_str(enum bpf_dynptr_type type)
 738 {
 739         switch (type) {
 740         case BPF_DYNPTR_TYPE_LOCAL:
 741                 return "local";
 742         case BPF_DYNPTR_TYPE_RINGBUF:
 743                 return "ringbuf";
 744         case BPF_DYNPTR_TYPE_SKB:
 745                 return "skb";
 746         case BPF_DYNPTR_TYPE_XDP:
 747                 return "xdp";
 748         case BPF_DYNPTR_TYPE_INVALID:
 749                 return "<invalid>";
 750         default:
 751                 WARN_ONCE(1, "unknown dynptr type %d\n", type);
 752                 return "<unknown>";
 753         }
 754 }
 755
 756 static const char *iter_type_str(const struct btf *btf, u32 btf_id)
 757 {
 758         if (!btf || btf_id == 0)
 759                 return "<invalid>";
 760
 761         /* we already validated that type is valid and has conforming name */
 762         return btf_type_name(btf, btf_id) + sizeof(ITER_PREFIX) - 1;
 763 }
 764
 765 static const char *iter_state_str(enum bpf_iter_state state)
 766 {
 767         switch (state) {
 768         case BPF_ITER_STATE_ACTIVE:
 769                 return "active";
 770         case BPF_ITER_STATE_DRAINED:
 771                 return "drained";
 772         case BPF_ITER_STATE_INVALID:
 773                 return "<invalid>";
 774         default:
 775                 WARN_ONCE(1, "unknown iter state %d\n", state);
 776                 return "<unknown>";
 777         }
 778 }
 779
 780 static void mark_reg_scratched(struct bpf_verifier_env *env, u32 regno)
 781 {
 782         env->scratched_regs |= 1U << regno;
 783 }
 784
 785 static void mark_stack_slot_scratched(struct bpf_verifier_env *env, u32 spi)
 786 {
 787         env->scratched_stack_slots |= 1ULL << spi;
 788 }
 789
 790 static bool reg_scratched(const struct bpf_verifier_env *env, u32 regno)
 791 {
 792         return (env->scratched_regs >> regno) & 1;
 793 }
 794
 795 static bool stack_slot_scratched(const struct bpf_verifier_env *env, u64 regno)
 796 {
 797         return (env->scratched_stack_slots >> regno) & 1;
 798 }
 799
 800 static bool verifier_state_scratched(const struct bpf_verifier_env *env)
 801 {
 802         return env->scratched_regs || env->scratched_stack_slots;
 803 }
 804
 805 static void mark_verifier_state_clean(struct bpf_verifier_env *env)
 806 {
 807         env->scratched_regs = 0U;
 808         env->scratched_stack_slots = 0ULL;
 809 }
 810
 811 /* Used for printing the entire verifier state. */
 812 static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
 813 {
 814         env->scratched_regs = ~0U;
 815         env->scratched_stack_slots = ~0ULL;
 816 }
 817
 818 static enum bpf_dynptr_type arg_to_dynptr_type(enum bpf_arg_type arg_type)
 819 {
 820         switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
 821         case DYNPTR_TYPE_LOCAL:
 822                 return BPF_DYNPTR_TYPE_LOCAL;
 823         case DYNPTR_TYPE_RINGBUF:
 824                 return BPF_DYNPTR_TYPE_RINGBUF;
 825         case DYNPTR_TYPE_SKB:
 826                 return BPF_DYNPTR_TYPE_SKB;
 827         case DYNPTR_TYPE_XDP:
 828                 return BPF_DYNPTR_TYPE_XDP;
 829         default:
 830                 return BPF_DYNPTR_TYPE_INVALID;
 831         }
 832 }
 833
 834 static enum bpf_type_flag get_dynptr_type_flag(enum bpf_dynptr_type type)
 835 {
 836         switch (type) {
 837         case BPF_DYNPTR_TYPE_LOCAL:
 838                 return DYNPTR_TYPE_LOCAL;
 839         case BPF_DYNPTR_TYPE_RINGBUF:
 840                 return DYNPTR_TYPE_RINGBUF;
 841         case BPF_DYNPTR_TYPE_SKB:
 842                 return DYNPTR_TYPE_SKB;
 843         case BPF_DYNPTR_TYPE_XDP:
 844                 return DYNPTR_TYPE_XDP;
 845         default:
 846                 return 0;
 847         }
 848 }
 849
 850 static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
 851 {
 852         return type == BPF_DYNPTR_TYPE_RINGBUF;
 853 }
 854
 855 static void __mark_dynptr_reg(struct bpf_reg_state *reg,
 856                               enum bpf_dynptr_type type,
 857                               bool first_slot, int dynptr_id);
 858
 859 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
 860                                 struct bpf_reg_state *reg);
 861
 862 static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
 863                                    struct bpf_reg_state *sreg1,
 864                                    struct bpf_reg_state *sreg2,
 865                                    enum bpf_dynptr_type type)
 866 {
 867         int id = ++env->id_gen;
 868
 869         __mark_dynptr_reg(sreg1, type, true, id);
 870         __mark_dynptr_reg(sreg2, type, false, id);
 871 }
 872
 873 static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
 874                                struct bpf_reg_state *reg,
 875                                enum bpf_dynptr_type type)
 876 {
 877         __mark_dynptr_reg(reg, type, true, ++env->id_gen);
 878 }
 879
 880 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
 881                                         struct bpf_func_state *state, int spi);
 882
 883 static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
 884                                    enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
 885 {
 886         struct bpf_func_state *state = func(env, reg);
 887         enum bpf_dynptr_type type;
 888         int spi, i, err;
 889
 890         spi = dynptr_get_spi(env, reg);
 891         if (spi < 0)
 892                 return spi;
 893
 894         /* We cannot assume both spi and spi - 1 belong to the same dynptr,
 895          * hence we need to call destroy_if_dynptr_stack_slot twice for both,
 896          * to ensure that for the following example:
 897          *      [d1][d1][d2][d2]
 898          * spi    3   2   1   0
 899          * So marking spi = 2 should lead to destruction of both d1 and d2. In
 900          * case they do belong to same dynptr, second call won't see slot_type
 901          * as STACK_DYNPTR and will simply skip destruction.
 902          */
 903         err = destroy_if_dynptr_stack_slot(env, state, spi);
 904         if (err)
 905                 return err;
 906         err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
 907         if (err)
 908                 return err;
 909
 910         for (i = 0; i < BPF_REG_SIZE; i++) {
 911                 state->stack[spi].slot_type[i] = STACK_DYNPTR;
 912                 state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
 913         }
 914
 915         type = arg_to_dynptr_type(arg_type);
 916         if (type == BPF_DYNPTR_TYPE_INVALID)
 917                 return -EINVAL;
 918
 919         mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
 920                                &state->stack[spi - 1].spilled_ptr, type);
 921
 922         if (dynptr_type_refcounted(type)) {
 923                 /* The id is used to track proper releasing */
 924                 int id;
 925
 926                 if (clone_ref_obj_id)
 927                         id = clone_ref_obj_id;
 928                 else
 929                         id = acquire_reference_state(env, insn_idx);
 930
 931                 if (id < 0)
 932                         return id;
 933
 934                 state->stack[spi].spilled_ptr.ref_obj_id = id;
 935                 state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
 936         }
 937
 938         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 939         state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
 940
 941         return 0;
 942 }
 943
 944 static void invalidate_dynptr(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
 945 {
 946         int i;
 947
 948         for (i = 0; i < BPF_REG_SIZE; i++) {
 949                 state->stack[spi].slot_type[i] = STACK_INVALID;
 950                 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
 951         }
 952
 953         __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
 954         __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
 955
 956         /* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
 957          *
 958          * While we don't allow reading STACK_INVALID, it is still possible to
 959          * do <8 byte writes marking some but not all slots as STACK_MISC. Then,
 960          * helpers or insns can do partial read of that part without failing,
 961          * but check_stack_range_initialized, check_stack_read_var_off, and
 962          * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
 963          * the slot conservatively. Hence we need to prevent those liveness
 964          * marking walks.
 965          *
 966          * This was not a problem before because STACK_INVALID is only set by
 967          * default (where the default reg state has its reg->parent as NULL), or
 968          * in clean_live_states after REG_LIVE_DONE (at which point
 969          * mark_reg_read won't walk reg->parent chain), but not randomly during
 970          * verifier state exploration (like we did above). Hence, for our case
 971          * parentage chain will still be live (i.e. reg->parent may be
 972          * non-NULL), while earlier reg->parent was NULL, so we need
 973          * REG_LIVE_WRITTEN to screen off read marker propagation when it is
 974          * done later on reads or by mark_dynptr_read as well to unnecessary
 975          * mark registers in verifier state.
 976          */
 977         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
 978         state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
 979 }
 980
 981 static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
 982 {
 983         struct bpf_func_state *state = func(env, reg);
 984         int spi, ref_obj_id, i;
 985
 986         spi = dynptr_get_spi(env, reg);
 987         if (spi < 0)
 988                 return spi;
 989
 990         if (!dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
 991                 invalidate_dynptr(env, state, spi);
 992                 return 0;
 993         }
 994
 995         ref_obj_id = state->stack[spi].spilled_ptr.ref_obj_id;
 996
 997         /* If the dynptr has a ref_obj_id, then we need to invalidate
 998          * two things:
 999          *
1000          * 1) Any dynptrs with a matching ref_obj_id (clones)
1001          * 2) Any slices derived from this dynptr.
1002          */
1003
1004         /* Invalidate any slices associated with this dynptr */
1005         WARN_ON_ONCE(release_reference(env, ref_obj_id));
1006
1007         /* Invalidate any dynptr clones */
1008         for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1009                 if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id)
1010                         continue;
1011
1012                 /* it should always be the case that if the ref obj id
1013                  * matches then the stack slot also belongs to a
1014                  * dynptr
1015                  */
1016                 if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
1017                         verbose(env, "verifier internal error: misconfigured ref_obj_id\n");
1018                         return -EFAULT;
1019                 }
1020                 if (state->stack[i].spilled_ptr.dynptr.first_slot)
1021                         invalidate_dynptr(env, state, i);
1022         }
1023
1024         return 0;
1025 }
1026
1027 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1028                                struct bpf_reg_state *reg);
1029
1030 static void mark_reg_invalid(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1031 {
1032         if (!env->allow_ptr_leaks)
1033                 __mark_reg_not_init(env, reg);
1034         else
1035                 __mark_reg_unknown(env, reg);
1036 }
1037
1038 static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
1039                                         struct bpf_func_state *state, int spi)
1040 {
1041         struct bpf_func_state *fstate;
1042         struct bpf_reg_state *dreg;
1043         int i, dynptr_id;
1044
1045         /* We always ensure that STACK_DYNPTR is never set partially,
1046          * hence just checking for slot_type[0] is enough. This is
1047          * different for STACK_SPILL, where it may be only set for
1048          * 1 byte, so code has to use is_spilled_reg.
1049          */
1050         if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
1051                 return 0;
1052
1053         /* Reposition spi to first slot */
1054         if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1055                 spi = spi + 1;
1056
1057         if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
1058                 verbose(env, "cannot overwrite referenced dynptr\n");
1059                 return -EINVAL;
1060         }
1061
1062         mark_stack_slot_scratched(env, spi);
1063         mark_stack_slot_scratched(env, spi - 1);
1064
1065         /* Writing partially to one dynptr stack slot destroys both. */
1066         for (i = 0; i < BPF_REG_SIZE; i++) {
1067                 state->stack[spi].slot_type[i] = STACK_INVALID;
1068                 state->stack[spi - 1].slot_type[i] = STACK_INVALID;
1069         }
1070
1071         dynptr_id = state->stack[spi].spilled_ptr.id;
1072         /* Invalidate any slices associated with this dynptr */
1073         bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
1074                 /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
1075                 if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
1076                         continue;
1077                 if (dreg->dynptr_id == dynptr_id)
1078                         mark_reg_invalid(env, dreg);
1079         }));
1080
1081         /* Do not release reference state, we are destroying dynptr on stack,
1082          * not using some helper to release it. Just reset register.
1083          */
1084         __mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
1085         __mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
1086
1087         /* Same reason as unmark_stack_slots_dynptr above */
1088         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
1089         state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
1090
1091         return 0;
1092 }
1093
1094 static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1095 {
1096         int spi;
1097
1098         if (reg->type == CONST_PTR_TO_DYNPTR)
1099                 return false;
1100
1101         spi = dynptr_get_spi(env, reg);
1102
1103         /* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an
1104          * error because this just means the stack state hasn't been updated yet.
1105          * We will do check_mem_access to check and update stack bounds later.
1106          */
1107         if (spi < 0 && spi != -ERANGE)
1108                 return false;
1109
1110         /* We don't need to check if the stack slots are marked by previous
1111          * dynptr initializations because we allow overwriting existing unreferenced
1112          * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls
1113          * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are
1114          * touching are completely destructed before we reinitialize them for a new
1115          * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early
1116          * instead of delaying it until the end where the user will get "Unreleased
1117          * reference" error.
1118          */
1119         return true;
1120 }
1121
1122 static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1123 {
1124         struct bpf_func_state *state = func(env, reg);
1125         int i, spi;
1126
1127         /* This already represents first slot of initialized bpf_dynptr.
1128          *
1129          * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
1130          * check_func_arg_reg_off's logic, so we don't need to check its
1131          * offset and alignment.
1132          */
1133         if (reg->type == CONST_PTR_TO_DYNPTR)
1134                 return true;
1135
1136         spi = dynptr_get_spi(env, reg);
1137         if (spi < 0)
1138                 return false;
1139         if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
1140                 return false;
1141
1142         for (i = 0; i < BPF_REG_SIZE; i++) {
1143                 if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
1144                     state->stack[spi - 1].slot_type[i] != STACK_DYNPTR)
1145                         return false;
1146         }
1147
1148         return true;
1149 }
1150
1151 static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1152                                     enum bpf_arg_type arg_type)
1153 {
1154         struct bpf_func_state *state = func(env, reg);
1155         enum bpf_dynptr_type dynptr_type;
1156         int spi;
1157
1158         /* ARG_PTR_TO_DYNPTR takes any type of dynptr */
1159         if (arg_type == ARG_PTR_TO_DYNPTR)
1160                 return true;
1161
1162         dynptr_type = arg_to_dynptr_type(arg_type);
1163         if (reg->type == CONST_PTR_TO_DYNPTR) {
1164                 return reg->dynptr.type == dynptr_type;
1165         } else {
1166                 spi = dynptr_get_spi(env, reg);
1167                 if (spi < 0)
1168                         return false;
1169                 return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
1170         }
1171 }
1172
1173 static void __mark_reg_known_zero(struct bpf_reg_state *reg);
1174
1175 static int mark_stack_slots_iter(struct bpf_verifier_env *env,
1176                                  struct bpf_reg_state *reg, int insn_idx,
1177                                  struct btf *btf, u32 btf_id, int nr_slots)
1178 {
1179         struct bpf_func_state *state = func(env, reg);
1180         int spi, i, j, id;
1181
1182         spi = iter_get_spi(env, reg, nr_slots);
1183         if (spi < 0)
1184                 return spi;
1185
1186         id = acquire_reference_state(env, insn_idx);
1187         if (id < 0)
1188                 return id;
1189
1190         for (i = 0; i < nr_slots; i++) {
1191                 struct bpf_stack_state *slot = &state->stack[spi - i];
1192                 struct bpf_reg_state *st = &slot->spilled_ptr;
1193
1194                 __mark_reg_known_zero(st);
1195                 st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
1196                 st->live |= REG_LIVE_WRITTEN;
1197                 st->ref_obj_id = i == 0 ? id : 0;
1198                 st->iter.btf = btf;
1199                 st->iter.btf_id = btf_id;
1200                 st->iter.state = BPF_ITER_STATE_ACTIVE;
1201                 st->iter.depth = 0;
1202
1203                 for (j = 0; j < BPF_REG_SIZE; j++)
1204                         slot->slot_type[j] = STACK_ITER;
1205
1206                 mark_stack_slot_scratched(env, spi - i);
1207         }
1208
1209         return 0;
1210 }
1211
1212 static int unmark_stack_slots_iter(struct bpf_verifier_env *env,
1213                                    struct bpf_reg_state *reg, int nr_slots)
1214 {
1215         struct bpf_func_state *state = func(env, reg);
1216         int spi, i, j;
1217
1218         spi = iter_get_spi(env, reg, nr_slots);
1219         if (spi < 0)
1220                 return spi;
1221
1222         for (i = 0; i < nr_slots; i++) {
1223                 struct bpf_stack_state *slot = &state->stack[spi - i];
1224                 struct bpf_reg_state *st = &slot->spilled_ptr;
1225
1226                 if (i == 0)
1227                         WARN_ON_ONCE(release_reference(env, st->ref_obj_id));
1228
1229                 __mark_reg_not_init(env, st);
1230
1231                 /* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
1232                 st->live |= REG_LIVE_WRITTEN;
1233
1234                 for (j = 0; j < BPF_REG_SIZE; j++)
1235                         slot->slot_type[j] = STACK_INVALID;
1236
1237                 mark_stack_slot_scratched(env, spi - i);
1238         }
1239
1240         return 0;
1241 }
1242
1243 static bool is_iter_reg_valid_uninit(struct bpf_verifier_env *env,
1244                                      struct bpf_reg_state *reg, int nr_slots)
1245 {
1246         struct bpf_func_state *state = func(env, reg);
1247         int spi, i, j;
1248
1249         /* For -ERANGE (i.e. spi not falling into allocated stack slots), we
1250          * will do check_mem_access to check and update stack bounds later, so
1251          * return true for that case.
1252          */
1253         spi = iter_get_spi(env, reg, nr_slots);
1254         if (spi == -ERANGE)
1255                 return true;
1256         if (spi < 0)
1257                 return false;
1258
1259         for (i = 0; i < nr_slots; i++) {
1260                 struct bpf_stack_state *slot = &state->stack[spi - i];
1261
1262                 for (j = 0; j < BPF_REG_SIZE; j++)
1263                         if (slot->slot_type[j] == STACK_ITER)
1264                                 return false;
1265         }
1266
1267         return true;
1268 }
1269
1270 static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
1271                                    struct btf *btf, u32 btf_id, int nr_slots)
1272 {
1273         struct bpf_func_state *state = func(env, reg);
1274         int spi, i, j;
1275
1276         spi = iter_get_spi(env, reg, nr_slots);
1277         if (spi < 0)
1278                 return false;
1279
1280         for (i = 0; i < nr_slots; i++) {
1281                 struct bpf_stack_state *slot = &state->stack[spi - i];
1282                 struct bpf_reg_state *st = &slot->spilled_ptr;
1283
1284                 /* only main (first) slot has ref_obj_id set */
1285                 if (i == 0 && !st->ref_obj_id)
1286                         return false;
1287                 if (i != 0 && st->ref_obj_id)
1288                         return false;
1289                 if (st->iter.btf != btf || st->iter.btf_id != btf_id)
1290                         return false;
1291
1292                 for (j = 0; j < BPF_REG_SIZE; j++)
1293                         if (slot->slot_type[j] != STACK_ITER)
1294                                 return false;
1295         }
1296
1297         return true;
1298 }
1299
1300 /* Check if given stack slot is "special":
1301  *   - spilled register state (STACK_SPILL);
1302  *   - dynptr state (STACK_DYNPTR);
1303  *   - iter state (STACK_ITER).
1304  */
1305 static bool is_stack_slot_special(const struct bpf_stack_state *stack)
1306 {
1307         enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
1308
1309         switch (type) {
1310         case STACK_SPILL:
1311         case STACK_DYNPTR:
1312         case STACK_ITER:
1313                 return true;
1314         case STACK_INVALID:
1315         case STACK_MISC:
1316         case STACK_ZERO:
1317                 return false;
1318         default:
1319                 WARN_ONCE(1, "unknown stack slot type %d\n", type);
1320                 return true;
1321         }
1322 }
1323
1324 /* The reg state of a pointer or a bounded scalar was saved when
1325  * it was spilled to the stack.
1326  */
1327 static bool is_spilled_reg(const struct bpf_stack_state *stack)
1328 {
1329         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
1330 }
1331
1332 static bool is_spilled_scalar_reg(const struct bpf_stack_state *stack)
1333 {
1334         return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL &&
1335                stack->spilled_ptr.type == SCALAR_VALUE;
1336 }
1337
1338 static void scrub_spilled_slot(u8 *stype)
1339 {
1340         if (*stype != STACK_INVALID)
1341                 *stype = STACK_MISC;
1342 }
1343
1344 static void print_verifier_state(struct bpf_verifier_env *env,
1345                                  const struct bpf_func_state *state,
1346                                  bool print_all)
1347 {
1348         const struct bpf_reg_state *reg;
1349         enum bpf_reg_type t;
1350         int i;
1351
1352         if (state->frameno)
1353                 verbose(env, " frame%d:", state->frameno);
1354         for (i = 0; i < MAX_BPF_REG; i++) {
1355                 reg = &state->regs[i];
1356                 t = reg->type;
1357                 if (t == NOT_INIT)
1358                         continue;
1359                 if (!print_all && !reg_scratched(env, i))
1360                         continue;
1361                 verbose(env, " R%d", i);
1362                 print_liveness(env, reg->live);
1363                 verbose(env, "=");
1364                 if (t == SCALAR_VALUE && reg->precise)
1365                         verbose(env, "P");
1366                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
1367                     tnum_is_const(reg->var_off)) {
1368                         /* reg->off should be 0 for SCALAR_VALUE */
1369                         verbose(env, "%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1370                         verbose(env, "%lld", reg->var_off.value + reg->off);
1371                 } else {
1372                         const char *sep = "";
1373
1374                         verbose(env, "%s", reg_type_str(env, t));
1375                         if (base_type(t) == PTR_TO_BTF_ID)
1376                                 verbose(env, "%s", btf_type_name(reg->btf, reg->btf_id));
1377                         verbose(env, "(");
1378 /*
1379  * _a stands for append, was shortened to avoid multiline statements below.
1380  * This macro is used to output a comma separated list of attributes.
1381  */
1382 #define verbose_a(fmt, ...) ({ verbose(env, "%s" fmt, sep, __VA_ARGS__); sep = ","; })
1383
1384                         if (reg->id)
1385                                 verbose_a("id=%d", reg->id);
1386                         if (reg->ref_obj_id)
1387                                 verbose_a("ref_obj_id=%d", reg->ref_obj_id);
1388                         if (type_is_non_owning_ref(reg->type))
1389                                 verbose_a("%s", "non_own_ref");
1390                         if (t != SCALAR_VALUE)
1391                                 verbose_a("off=%d", reg->off);
1392                         if (type_is_pkt_pointer(t))
1393                                 verbose_a("r=%d", reg->range);
1394                         else if (base_type(t) == CONST_PTR_TO_MAP ||
1395                                  base_type(t) == PTR_TO_MAP_KEY ||
1396                                  base_type(t) == PTR_TO_MAP_VALUE)
1397                                 verbose_a("ks=%d,vs=%d",
1398                                           reg->map_ptr->key_size,
1399                                           reg->map_ptr->value_size);
1400                         if (tnum_is_const(reg->var_off)) {
1401                                 /* Typically an immediate SCALAR_VALUE, but
1402                                  * could be a pointer whose offset is too big
1403                                  * for reg->off
1404                                  */
1405                                 verbose_a("imm=%llx", reg->var_off.value);
1406                         } else {
1407                                 if (reg->smin_value != reg->umin_value &&
1408                                     reg->smin_value != S64_MIN)
1409                                         verbose_a("smin=%lld", (long long)reg->smin_value);
1410                                 if (reg->smax_value != reg->umax_value &&
1411                                     reg->smax_value != S64_MAX)
1412                                         verbose_a("smax=%lld", (long long)reg->smax_value);
1413                                 if (reg->umin_value != 0)
1414                                         verbose_a("umin=%llu", (unsigned long long)reg->umin_value);
1415                                 if (reg->umax_value != U64_MAX)
1416                                         verbose_a("umax=%llu", (unsigned long long)reg->umax_value);
1417                                 if (!tnum_is_unknown(reg->var_off)) {
1418                                         char tn_buf[48];
1419
1420                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
1421                                         verbose_a("var_off=%s", tn_buf);
1422                                 }
1423                                 if (reg->s32_min_value != reg->smin_value &&
1424                                     reg->s32_min_value != S32_MIN)
1425                                         verbose_a("s32_min=%d", (int)(reg->s32_min_value));
1426                                 if (reg->s32_max_value != reg->smax_value &&
1427                                     reg->s32_max_value != S32_MAX)
1428                                         verbose_a("s32_max=%d", (int)(reg->s32_max_value));
1429                                 if (reg->u32_min_value != reg->umin_value &&
1430                                     reg->u32_min_value != U32_MIN)
1431                                         verbose_a("u32_min=%d", (int)(reg->u32_min_value));
1432                                 if (reg->u32_max_value != reg->umax_value &&
1433                                     reg->u32_max_value != U32_MAX)
1434                                         verbose_a("u32_max=%d", (int)(reg->u32_max_value));
1435                         }
1436 #undef verbose_a
1437
1438                         verbose(env, ")");
1439                 }
1440         }
1441         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
1442                 char types_buf[BPF_REG_SIZE + 1];
1443                 bool valid = false;
1444                 int j;
1445
1446                 for (j = 0; j < BPF_REG_SIZE; j++) {
1447                         if (state->stack[i].slot_type[j] != STACK_INVALID)
1448                                 valid = true;
1449                         types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1450                 }
1451                 types_buf[BPF_REG_SIZE] = 0;
1452                 if (!valid)
1453                         continue;
1454                 if (!print_all && !stack_slot_scratched(env, i))
1455                         continue;
1456                 switch (state->stack[i].slot_type[BPF_REG_SIZE - 1]) {
1457                 case STACK_SPILL:
1458                         reg = &state->stack[i].spilled_ptr;
1459                         t = reg->type;
1460
1461                         verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1462                         print_liveness(env, reg->live);
1463                         verbose(env, "=%s", t == SCALAR_VALUE ? "" : reg_type_str(env, t));
1464                         if (t == SCALAR_VALUE && reg->precise)
1465                                 verbose(env, "P");
1466                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
1467                                 verbose(env, "%lld", reg->var_off.value + reg->off);
1468                         break;
1469                 case STACK_DYNPTR:
1470                         i += BPF_DYNPTR_NR_SLOTS - 1;
1471                         reg = &state->stack[i].spilled_ptr;
1472
1473                         verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1474                         print_liveness(env, reg->live);
1475                         verbose(env, "=dynptr_%s", dynptr_type_str(reg->dynptr.type));
1476                         if (reg->ref_obj_id)
1477                                 verbose(env, "(ref_id=%d)", reg->ref_obj_id);
1478                         break;
1479                 case STACK_ITER:
1480                         /* only main slot has ref_obj_id set; skip others */
1481                         reg = &state->stack[i].spilled_ptr;
1482                         if (!reg->ref_obj_id)
1483                                 continue;
1484
1485                         verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1486                         print_liveness(env, reg->live);
1487                         verbose(env, "=iter_%s(ref_id=%d,state=%s,depth=%u)",
1488                                 iter_type_str(reg->iter.btf, reg->iter.btf_id),
1489                                 reg->ref_obj_id, iter_state_str(reg->iter.state),
1490                                 reg->iter.depth);
1491                         break;
1492                 case STACK_MISC:
1493                 case STACK_ZERO:
1494                 default:
1495                         reg = &state->stack[i].spilled_ptr;
1496
1497                         for (j = 0; j < BPF_REG_SIZE; j++)
1498                                 types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
1499                         types_buf[BPF_REG_SIZE] = 0;
1500
1501                         verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
1502                         print_liveness(env, reg->live);
1503                         verbose(env, "=%s", types_buf);
1504                         break;
1505                 }
1506         }
1507         if (state->acquired_refs && state->refs[0].id) {
1508                 verbose(env, " refs=%d", state->refs[0].id);
1509                 for (i = 1; i < state->acquired_refs; i++)
1510                         if (state->refs[i].id)
1511                                 verbose(env, ",%d", state->refs[i].id);
1512         }
1513         if (state->in_callback_fn)
1514                 verbose(env, " cb");
1515         if (state->in_async_callback_fn)
1516                 verbose(env, " async_cb");
1517         verbose(env, "\n");
1518         if (!print_all)
1519                 mark_verifier_state_clean(env);
1520 }
1521
1522 static inline u32 vlog_alignment(u32 pos)
1523 {
1524         return round_up(max(pos + BPF_LOG_MIN_ALIGNMENT / 2, BPF_LOG_ALIGNMENT),
1525                         BPF_LOG_MIN_ALIGNMENT) - pos - 1;
1526 }
1527
1528 static void print_insn_state(struct bpf_verifier_env *env,
1529                              const struct bpf_func_state *state)
1530 {
1531         if (env->prev_log_pos && env->prev_log_pos == env->log.end_pos) {
1532                 /* remove new line character */
1533                 bpf_vlog_reset(&env->log, env->prev_log_pos - 1);
1534                 verbose(env, "%*c;", vlog_alignment(env->prev_insn_print_pos), ' ');
1535         } else {
1536                 verbose(env, "%d:", env->insn_idx);
1537         }
1538         print_verifier_state(env, state, false);
1539 }
1540
1541 /* copy array src of length n * size bytes to dst. dst is reallocated if it's too
1542  * small to hold src. This is different from krealloc since we don't want to preserve
1543  * the contents of dst.
1544  *
1545  * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could
1546  * not be allocated.
1547  */
1548 static void *copy_array(void *dst, const void *src, size_t n, size_t size, gfp_t flags)
1549 {
1550         size_t alloc_bytes;
1551         void *orig = dst;
1552         size_t bytes;
1553
1554         if (ZERO_OR_NULL_PTR(src))
1555                 goto out;
1556
1557         if (unlikely(check_mul_overflow(n, size, &bytes)))
1558                 return NULL;
1559
1560         alloc_bytes = max(ksize(orig), kmalloc_size_roundup(bytes));
1561         dst = krealloc(orig, alloc_bytes, flags);
1562         if (!dst) {
1563                 kfree(orig);
1564                 return NULL;
1565         }
1566
1567         memcpy(dst, src, bytes);
1568 out:
1569         return dst ? dst : ZERO_SIZE_PTR;
1570 }
1571
1572 /* resize an array from old_n items to new_n items. the array is reallocated if it's too
1573  * small to hold new_n items. new items are zeroed out if the array grows.
1574  *
1575  * Contrary to krealloc_array, does not free arr if new_n is zero.
1576  */
1577 static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
1578 {
1579         size_t alloc_size;
1580         void *new_arr;
1581
1582         if (!new_n || old_n == new_n)
1583                 goto out;
1584
1585         alloc_size = kmalloc_size_roundup(size_mul(new_n, size));
1586         new_arr = krealloc(arr, alloc_size, GFP_KERNEL);
1587         if (!new_arr) {
1588                 kfree(arr);
1589                 return NULL;
1590         }
1591         arr = new_arr;
1592
1593         if (new_n > old_n)
1594                 memset(arr + old_n * size, 0, (new_n - old_n) * size);
1595
1596 out:
1597         return arr ? arr : ZERO_SIZE_PTR;
1598 }
1599
1600 static int copy_reference_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1601 {
1602         dst->refs = copy_array(dst->refs, src->refs, src->acquired_refs,
1603                                sizeof(struct bpf_reference_state), GFP_KERNEL);
1604         if (!dst->refs)
1605                 return -ENOMEM;
1606
1607         dst->acquired_refs = src->acquired_refs;
1608         return 0;
1609 }
1610
1611 static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
1612 {
1613         size_t n = src->allocated_stack / BPF_REG_SIZE;
1614
1615         dst->stack = copy_array(dst->stack, src->stack, n, sizeof(struct bpf_stack_state),
1616                                 GFP_KERNEL);
1617         if (!dst->stack)
1618                 return -ENOMEM;
1619
1620         dst->allocated_stack = src->allocated_stack;
1621         return 0;
1622 }
1623
1624 static int resize_reference_state(struct bpf_func_state *state, size_t n)
1625 {
1626         state->refs = realloc_array(state->refs, state->acquired_refs, n,
1627                                     sizeof(struct bpf_reference_state));
1628         if (!state->refs)
1629                 return -ENOMEM;
1630
1631         state->acquired_refs = n;
1632         return 0;
1633 }
1634
1635 /* Possibly update state->allocated_stack to be at least size bytes. Also
1636  * possibly update the function's high-water mark in its bpf_subprog_info.
1637  */
1638 static int grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
1639 {
1640         size_t old_n = state->allocated_stack / BPF_REG_SIZE, n = size / BPF_REG_SIZE;
1641
1642         if (old_n >= n)
1643                 return 0;
1644
1645         state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state));
1646         if (!state->stack)
1647                 return -ENOMEM;
1648
1649         state->allocated_stack = size;
1650
1651         /* update known max for given subprogram */
1652         if (env->subprog_info[state->subprogno].stack_depth < size)
1653                 env->subprog_info[state->subprogno].stack_depth = size;
1654
1655         return 0;
1656 }
1657
1658 /* Acquire a pointer id from the env and update the state->refs to include
1659  * this new pointer reference.
1660  * On success, returns a valid pointer id to associate with the register
1661  * On failure, returns a negative errno.
1662  */
1663 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
1664 {
1665         struct bpf_func_state *state = cur_func(env);
1666         int new_ofs = state->acquired_refs;
1667         int id, err;
1668
1669         err = resize_reference_state(state, state->acquired_refs + 1);
1670         if (err)
1671                 return err;
1672         id = ++env->id_gen;
1673         state->refs[new_ofs].id = id;
1674         state->refs[new_ofs].insn_idx = insn_idx;
1675         state->refs[new_ofs].callback_ref = state->in_callback_fn ? state->frameno : 0;
1676
1677         return id;
1678 }
1679
1680 /* release function corresponding to acquire_reference_state(). Idempotent. */
1681 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
1682 {
1683         int i, last_idx;
1684
1685         last_idx = state->acquired_refs - 1;
1686         for (i = 0; i < state->acquired_refs; i++) {
1687                 if (state->refs[i].id == ptr_id) {
1688                         /* Cannot release caller references in callbacks */
1689                         if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
1690                                 return -EINVAL;
1691                         if (last_idx && i != last_idx)
1692                                 memcpy(&state->refs[i], &state->refs[last_idx],
1693                                        sizeof(*state->refs));
1694                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
1695                         state->acquired_refs--;
1696                         return 0;
1697                 }
1698         }
1699         return -EINVAL;
1700 }
1701
1702 static void free_func_state(struct bpf_func_state *state)
1703 {
1704         if (!state)
1705                 return;
1706         kfree(state->refs);
1707         kfree(state->stack);
1708         kfree(state);
1709 }
1710
1711 static void clear_jmp_history(struct bpf_verifier_state *state)
1712 {
1713         kfree(state->jmp_history);
1714         state->jmp_history = NULL;
1715         state->jmp_history_cnt = 0;
1716 }
1717
1718 static void free_verifier_state(struct bpf_verifier_state *state,
1719                                 bool free_self)
1720 {
1721         int i;
1722
1723         for (i = 0; i <= state->curframe; i++) {
1724                 free_func_state(state->frame[i]);
1725                 state->frame[i] = NULL;
1726         }
1727         clear_jmp_history(state);
1728         if (free_self)
1729                 kfree(state);
1730 }
1731
1732 /* copy verifier state from src to dst growing dst stack space
1733  * when necessary to accommodate larger src stack
1734  */
1735 static int copy_func_state(struct bpf_func_state *dst,
1736                            const struct bpf_func_state *src)
1737 {
1738         int err;
1739
1740         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
1741         err = copy_reference_state(dst, src);
1742         if (err)
1743                 return err;
1744         return copy_stack_state(dst, src);
1745 }
1746
1747 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
1748                                const struct bpf_verifier_state *src)
1749 {
1750         struct bpf_func_state *dst;
1751         int i, err;
1752
1753         dst_state->jmp_history = copy_array(dst_state->jmp_history, src->jmp_history,
1754                                             src->jmp_history_cnt, sizeof(struct bpf_idx_pair),
1755                                             GFP_USER);
1756         if (!dst_state->jmp_history)
1757                 return -ENOMEM;
1758         dst_state->jmp_history_cnt = src->jmp_history_cnt;
1759
1760         /* if dst has more stack frames then src frame, free them */
1761         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
1762                 free_func_state(dst_state->frame[i]);
1763                 dst_state->frame[i] = NULL;
1764         }
1765         dst_state->speculative = src->speculative;
1766         dst_state->active_rcu_lock = src->active_rcu_lock;
1767         dst_state->curframe = src->curframe;
1768         dst_state->active_lock.ptr = src->active_lock.ptr;
1769         dst_state->active_lock.id = src->active_lock.id;
1770         dst_state->branches = src->branches;
1771         dst_state->parent = src->parent;
1772         dst_state->first_insn_idx = src->first_insn_idx;
1773         dst_state->last_insn_idx = src->last_insn_idx;
1774         dst_state->dfs_depth = src->dfs_depth;
1775         dst_state->used_as_loop_entry = src->used_as_loop_entry;
1776         for (i = 0; i <= src->curframe; i++) {
1777                 dst = dst_state->frame[i];
1778                 if (!dst) {
1779                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
1780                         if (!dst)
1781                                 return -ENOMEM;
1782                         dst_state->frame[i] = dst;
1783                 }
1784                 err = copy_func_state(dst, src->frame[i]);
1785                 if (err)
1786                         return err;
1787         }
1788         return 0;
1789 }
1790
1791 static u32 state_htab_size(struct bpf_verifier_env *env)
1792 {
1793         return env->prog->len;
1794 }
1795
1796 static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
1797 {
1798         struct bpf_verifier_state *cur = env->cur_state;
1799         struct bpf_func_state *state = cur->frame[cur->curframe];
1800
1801         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
1802 }
1803
1804 static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
1805 {
1806         int fr;
1807
1808         if (a->curframe != b->curframe)
1809                 return false;
1810
1811         for (fr = a->curframe; fr >= 0; fr--)
1812                 if (a->frame[fr]->callsite != b->frame[fr]->callsite)
1813                         return false;
1814
1815         return true;
1816 }
1817
1818 /* Open coded iterators allow back-edges in the state graph in order to
1819  * check unbounded loops that iterators.
1820  *
1821  * In is_state_visited() it is necessary to know if explored states are
1822  * part of some loops in order to decide whether non-exact states
1823  * comparison could be used:
1824  * - non-exact states comparison establishes sub-state relation and uses
1825  *   read and precision marks to do so, these marks are propagated from
1826  *   children states and thus are not guaranteed to be final in a loop;
1827  * - exact states comparison just checks if current and explored states
1828  *   are identical (and thus form a back-edge).
1829  *
1830  * Paper "A New Algorithm for Identifying Loops in Decompilation"
1831  * by Tao Wei, Jian Mao, Wei Zou and Yu Chen [1] presents a convenient
1832  * algorithm for loop structure detection and gives an overview of
1833  * relevant terminology. It also has helpful illustrations.
1834  *
1835  * [1] https://api.semanticscholar.org/CorpusID:15784067
1836  *
1837  * We use a similar algorithm but because loop nested structure is
1838  * irrelevant for verifier ours is significantly simpler and resembles
1839  * strongly connected components algorithm from Sedgewick's textbook.
1840  *
1841  * Define topmost loop entry as a first node of the loop traversed in a
1842  * depth first search starting from initial state. The goal of the loop
1843  * tracking algorithm is to associate topmost loop entries with states
1844  * derived from these entries.
1845  *
1846  * For each step in the DFS states traversal algorithm needs to identify
1847  * the following situations:
1848  *
1849  *          initial                     initial                   initial
1850  *            |                           |                         |
1851  *            V                           V                         V
1852  *           ...                         ...           .---------> hdr
1853  *            |                           |            |            |
1854  *            V                           V            |            V
1855  *           cur                     .-> succ          |    .------...
1856  *            |                      |    |            |    |       |
1857  *            V                      |    V            |    V       V
1858  *           succ                    '-- cur           |   ...     ...
1859  *                                                     |    |       |
1860  *                                                     |    V       V
1861  *                                                     |   succ <- cur
1862  *                                                     |    |
1863  *                                                     |    V
1864  *                                                     |   ...
1865  *                                                     |    |
1866  *                                                     '----'
1867  *
1868  *  (A) successor state of cur   (B) successor state of cur or it's entry
1869  *      not yet traversed            are in current DFS path, thus cur and succ
1870  *                                   are members of the same outermost loop
1871  *
1872  *                      initial                  initial
1873  *                        |                        |
1874  *                        V                        V
1875  *                       ...                      ...
1876  *                        |                        |
1877  *                        V                        V
1878  *                .------...               .------...
1879  *                |       |                |       |
1880  *                V       V                V       V
1881  *           .-> hdr     ...              ...     ...
1882  *           |    |       |                |       |
1883  *           |    V       V                V       V
1884  *           |   succ <- cur              succ <- cur
1885  *           |    |                        |
1886  *           |    V                        V
1887  *           |   ...                      ...
1888  *           |    |                        |
1889  *           '----'                       exit
1890  *
1891  * (C) successor state of cur is a part of some loop but this loop
1892  *     does not include cur or successor state is not in a loop at all.
1893  *
1894  * Algorithm could be described as the following python code:
1895  *
1896  *     traversed = set()   # Set of traversed nodes
1897  *     entries = {}        # Mapping from node to loop entry
1898  *     depths = {}         # Depth level assigned to graph node
1899  *     path = set()        # Current DFS path
1900  *
1901  *     # Find outermost loop entry known for n
1902  *     def get_loop_entry(n):
1903  *         h = entries.get(n, None)
1904  *         while h in entries and entries[h] != h:
1905  *             h = entries[h]
1906  *         return h
1907  *
1908  *     # Update n's loop entry if h's outermost entry comes
1909  *     # before n's outermost entry in current DFS path.
1910  *     def update_loop_entry(n, h):
1911  *         n1 = get_loop_entry(n) or n
1912  *         h1 = get_loop_entry(h) or h
1913  *         if h1 in path and depths[h1] <= depths[n1]:
1914  *             entries[n] = h1
1915  *
1916  *     def dfs(n, depth):
1917  *         traversed.add(n)
1918  *         path.add(n)
1919  *         depths[n] = depth
1920  *         for succ in G.successors(n):
1921  *             if succ not in traversed:
1922  *                 # Case A: explore succ and update cur's loop entry
1923  *                 #         only if succ's entry is in current DFS path.
1924  *                 dfs(succ, depth + 1)
1925  *                 h = get_loop_entry(succ)
1926  *                 update_loop_entry(n, h)
1927  *             else:
1928  *                 # Case B or C depending on `h1 in path` check in update_loop_entry().
1929  *                 update_loop_entry(n, succ)
1930  *         path.remove(n)
1931  *
1932  * To adapt this algorithm for use with verifier:
1933  * - use st->branch == 0 as a signal that DFS of succ had been finished
1934  *   and cur's loop entry has to be updated (case A), handle this in
1935  *   update_branch_counts();
1936  * - use st->branch > 0 as a signal that st is in the current DFS path;
1937  * - handle cases B and C in is_state_visited();
1938  * - update topmost loop entry for intermediate states in get_loop_entry().
1939  */
1940 static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st)
1941 {
1942         struct bpf_verifier_state *topmost = st->loop_entry, *old;
1943
1944         while (topmost && topmost->loop_entry && topmost != topmost->loop_entry)
1945                 topmost = topmost->loop_entry;
1946         /* Update loop entries for intermediate states to avoid this
1947          * traversal in future get_loop_entry() calls.
1948          */
1949         while (st && st->loop_entry != topmost) {
1950                 old = st->loop_entry;
1951                 st->loop_entry = topmost;
1952                 st = old;
1953         }
1954         return topmost;
1955 }
1956
1957 static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr)
1958 {
1959         struct bpf_verifier_state *cur1, *hdr1;
1960
1961         cur1 = get_loop_entry(cur) ?: cur;
1962         hdr1 = get_loop_entry(hdr) ?: hdr;
1963         /* The head1->branches check decides between cases B and C in
1964          * comment for get_loop_entry(). If hdr1->branches == 0 then
1965          * head's topmost loop entry is not in current DFS path,
1966          * hence 'cur' and 'hdr' are not in the same loop and there is
1967          * no need to update cur->loop_entry.
1968          */
1969         if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) {
1970                 cur->loop_entry = hdr;
1971                 hdr->used_as_loop_entry = true;
1972         }
1973 }
1974
1975 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1976 {
1977         while (st) {
1978                 u32 br = --st->branches;
1979
1980                 /* br == 0 signals that DFS exploration for 'st' is finished,
1981                  * thus it is necessary to update parent's loop entry if it
1982                  * turned out that st is a part of some loop.
1983                  * This is a part of 'case A' in get_loop_entry() comment.
1984                  */
1985                 if (br == 0 && st->parent && st->loop_entry)
1986                         update_loop_entry(st->parent, st->loop_entry);
1987
1988                 /* WARN_ON(br > 1) technically makes sense here,
1989                  * but see comment in push_stack(), hence:
1990                  */
1991                 WARN_ONCE((int)br < 0,
1992                           "BUG update_branch_counts:branches_to_explore=%d\n",
1993                           br);
1994                 if (br)
1995                         break;
1996                 st = st->parent;
1997         }
1998 }
1999
2000 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
2001                      int *insn_idx, bool pop_log)
2002 {
2003         struct bpf_verifier_state *cur = env->cur_state;
2004         struct bpf_verifier_stack_elem *elem, *head = env->head;
2005         int err;
2006
2007         if (env->head == NULL)
2008                 return -ENOENT;
2009
2010         if (cur) {
2011                 err = copy_verifier_state(cur, &head->st);
2012                 if (err)
2013                         return err;
2014         }
2015         if (pop_log)
2016                 bpf_vlog_reset(&env->log, head->log_pos);
2017         if (insn_idx)
2018                 *insn_idx = head->insn_idx;
2019         if (prev_insn_idx)
2020                 *prev_insn_idx = head->prev_insn_idx;
2021         elem = head->next;
2022         free_verifier_state(&head->st, false);
2023         kfree(head);
2024         env->head = elem;
2025         env->stack_size--;
2026         return 0;
2027 }
2028
2029 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
2030                                              int insn_idx, int prev_insn_idx,
2031                                              bool speculative)
2032 {
2033         struct bpf_verifier_state *cur = env->cur_state;
2034         struct bpf_verifier_stack_elem *elem;
2035         int err;
2036
2037         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2038         if (!elem)
2039                 goto err;
2040
2041         elem->insn_idx = insn_idx;
2042         elem->prev_insn_idx = prev_insn_idx;
2043         elem->next = env->head;
2044         elem->log_pos = env->log.end_pos;
2045         env->head = elem;
2046         env->stack_size++;
2047         err = copy_verifier_state(&elem->st, cur);
2048         if (err)
2049                 goto err;
2050         elem->st.speculative |= speculative;
2051         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2052                 verbose(env, "The sequence of %d jumps is too complex.\n",
2053                         env->stack_size);
2054                 goto err;
2055         }
2056         if (elem->st.parent) {
2057                 ++elem->st.parent->branches;
2058                 /* WARN_ON(branches > 2) technically makes sense here,
2059                  * but
2060                  * 1. speculative states will bump 'branches' for non-branch
2061                  * instructions
2062                  * 2. is_state_visited() heuristics may decide not to create
2063                  * a new state for a sequence of branches and all such current
2064                  * and cloned states will be pointing to a single parent state
2065                  * which might have large 'branches' count.
2066                  */
2067         }
2068         return &elem->st;
2069 err:
2070         free_verifier_state(env->cur_state, true);
2071         env->cur_state = NULL;
2072         /* pop all elements and return */
2073         while (!pop_stack(env, NULL, NULL, false));
2074         return NULL;
2075 }
2076
2077 #define CALLER_SAVED_REGS 6
2078 static const int caller_saved[CALLER_SAVED_REGS] = {
2079         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
2080 };
2081
2082 /* This helper doesn't clear reg->id */
2083 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2084 {
2085         reg->var_off = tnum_const(imm);
2086         reg->smin_value = (s64)imm;
2087         reg->smax_value = (s64)imm;
2088         reg->umin_value = imm;
2089         reg->umax_value = imm;
2090
2091         reg->s32_min_value = (s32)imm;
2092         reg->s32_max_value = (s32)imm;
2093         reg->u32_min_value = (u32)imm;
2094         reg->u32_max_value = (u32)imm;
2095 }
2096
2097 /* Mark the unknown part of a register (variable offset or scalar value) as
2098  * known to have the value @imm.
2099  */
2100 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
2101 {
2102         /* Clear off and union(map_ptr, range) */
2103         memset(((u8 *)reg) + sizeof(reg->type), 0,
2104                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
2105         reg->id = 0;
2106         reg->ref_obj_id = 0;
2107         ___mark_reg_known(reg, imm);
2108 }
2109
2110 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
2111 {
2112         reg->var_off = tnum_const_subreg(reg->var_off, imm);
2113         reg->s32_min_value = (s32)imm;
2114         reg->s32_max_value = (s32)imm;
2115         reg->u32_min_value = (u32)imm;
2116         reg->u32_max_value = (u32)imm;
2117 }
2118
2119 /* Mark the 'variable offset' part of a register as zero.  This should be
2120  * used only on registers holding a pointer type.
2121  */
2122 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
2123 {
2124         __mark_reg_known(reg, 0);
2125 }
2126
2127 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
2128 {
2129         __mark_reg_known(reg, 0);
2130         reg->type = SCALAR_VALUE;
2131 }
2132
2133 static void mark_reg_known_zero(struct bpf_verifier_env *env,
2134                                 struct bpf_reg_state *regs, u32 regno)
2135 {
2136         if (WARN_ON(regno >= MAX_BPF_REG)) {
2137                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
2138                 /* Something bad happened, let's kill all regs */
2139                 for (regno = 0; regno < MAX_BPF_REG; regno++)
2140                         __mark_reg_not_init(env, regs + regno);
2141                 return;
2142         }
2143         __mark_reg_known_zero(regs + regno);
2144 }
2145
2146 static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
2147                               bool first_slot, int dynptr_id)
2148 {
2149         /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
2150          * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
2151          * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
2152          */
2153         __mark_reg_known_zero(reg);
2154         reg->type = CONST_PTR_TO_DYNPTR;
2155         /* Give each dynptr a unique id to uniquely associate slices to it. */
2156         reg->id = dynptr_id;
2157         reg->dynptr.type = type;
2158         reg->dynptr.first_slot = first_slot;
2159 }
2160
2161 static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
2162 {
2163         if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
2164                 const struct bpf_map *map = reg->map_ptr;
2165
2166                 if (map->inner_map_meta) {
2167                         reg->type = CONST_PTR_TO_MAP;
2168                         reg->map_ptr = map->inner_map_meta;
2169                         /* transfer reg's id which is unique for every map_lookup_elem
2170                          * as UID of the inner map.
2171                          */
2172                         if (btf_record_has_field(map->inner_map_meta->record, BPF_TIMER))
2173                                 reg->map_uid = reg->id;
2174                 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
2175                         reg->type = PTR_TO_XDP_SOCK;
2176                 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
2177                            map->map_type == BPF_MAP_TYPE_SOCKHASH) {
2178                         reg->type = PTR_TO_SOCKET;
2179                 } else {
2180                         reg->type = PTR_TO_MAP_VALUE;
2181                 }
2182                 return;
2183         }
2184
2185         reg->type &= ~PTR_MAYBE_NULL;
2186 }
2187
2188 static void mark_reg_graph_node(struct bpf_reg_state *regs, u32 regno,
2189                                 struct btf_field_graph_root *ds_head)
2190 {
2191         __mark_reg_known_zero(&regs[regno]);
2192         regs[regno].type = PTR_TO_BTF_ID | MEM_ALLOC;
2193         regs[regno].btf = ds_head->btf;
2194         regs[regno].btf_id = ds_head->value_btf_id;
2195         regs[regno].off = ds_head->node_offset;
2196 }
2197
2198 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
2199 {
2200         return type_is_pkt_pointer(reg->type);
2201 }
2202
2203 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
2204 {
2205         return reg_is_pkt_pointer(reg) ||
2206                reg->type == PTR_TO_PACKET_END;
2207 }
2208
2209 static bool reg_is_dynptr_slice_pkt(const struct bpf_reg_state *reg)
2210 {
2211         return base_type(reg->type) == PTR_TO_MEM &&
2212                 (reg->type & DYNPTR_TYPE_SKB || reg->type & DYNPTR_TYPE_XDP);
2213 }
2214
2215 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
2216 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
2217                                     enum bpf_reg_type which)
2218 {
2219         /* The register can already have a range from prior markings.
2220          * This is fine as long as it hasn't been advanced from its
2221          * origin.
2222          */
2223         return reg->type == which &&
2224                reg->id == 0 &&
2225                reg->off == 0 &&
2226                tnum_equals_const(reg->var_off, 0);
2227 }
2228
2229 /* Reset the min/max bounds of a register */
2230 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
2231 {
2232         reg->smin_value = S64_MIN;
2233         reg->smax_value = S64_MAX;
2234         reg->umin_value = 0;
2235         reg->umax_value = U64_MAX;
2236
2237         reg->s32_min_value = S32_MIN;
2238         reg->s32_max_value = S32_MAX;
2239         reg->u32_min_value = 0;
2240         reg->u32_max_value = U32_MAX;
2241 }
2242
2243 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
2244 {
2245         reg->smin_value = S64_MIN;
2246         reg->smax_value = S64_MAX;
2247         reg->umin_value = 0;
2248         reg->umax_value = U64_MAX;
2249 }
2250
2251 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
2252 {
2253         reg->s32_min_value = S32_MIN;
2254         reg->s32_max_value = S32_MAX;
2255         reg->u32_min_value = 0;
2256         reg->u32_max_value = U32_MAX;
2257 }
2258
2259 static void __update_reg32_bounds(struct bpf_reg_state *reg)
2260 {
2261         struct tnum var32_off = tnum_subreg(reg->var_off);
2262
2263         /* min signed is max(sign bit) | min(other bits) */
2264         reg->s32_min_value = max_t(s32, reg->s32_min_value,
2265                         var32_off.value | (var32_off.mask & S32_MIN));
2266         /* max signed is min(sign bit) | max(other bits) */
2267         reg->s32_max_value = min_t(s32, reg->s32_max_value,
2268                         var32_off.value | (var32_off.mask & S32_MAX));
2269         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
2270         reg->u32_max_value = min(reg->u32_max_value,
2271                                  (u32)(var32_off.value | var32_off.mask));
2272 }
2273
2274 static void __update_reg64_bounds(struct bpf_reg_state *reg)
2275 {
2276         /* min signed is max(sign bit) | min(other bits) */
2277         reg->smin_value = max_t(s64, reg->smin_value,
2278                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
2279         /* max signed is min(sign bit) | max(other bits) */
2280         reg->smax_value = min_t(s64, reg->smax_value,
2281                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
2282         reg->umin_value = max(reg->umin_value, reg->var_off.value);
2283         reg->umax_value = min(reg->umax_value,
2284                               reg->var_off.value | reg->var_off.mask);
2285 }
2286
2287 static void __update_reg_bounds(struct bpf_reg_state *reg)
2288 {
2289         __update_reg32_bounds(reg);
2290         __update_reg64_bounds(reg);
2291 }
2292
2293 /* Uses signed min/max values to inform unsigned, and vice-versa */
2294 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
2295 {
2296         /* Learn sign from signed bounds.
2297          * If we cannot cross the sign boundary, then signed and unsigned bounds
2298          * are the same, so combine.  This works even in the negative case, e.g.
2299          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2300          */
2301         if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
2302                 reg->s32_min_value = reg->u32_min_value =
2303                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
2304                 reg->s32_max_value = reg->u32_max_value =
2305                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
2306                 return;
2307         }
2308         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
2309          * boundary, so we must be careful.
2310          */
2311         if ((s32)reg->u32_max_value >= 0) {
2312                 /* Positive.  We can't learn anything from the smin, but smax
2313                  * is positive, hence safe.
2314                  */
2315                 reg->s32_min_value = reg->u32_min_value;
2316                 reg->s32_max_value = reg->u32_max_value =
2317                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
2318         } else if ((s32)reg->u32_min_value < 0) {
2319                 /* Negative.  We can't learn anything from the smax, but smin
2320                  * is negative, hence safe.
2321                  */
2322                 reg->s32_min_value = reg->u32_min_value =
2323                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
2324                 reg->s32_max_value = reg->u32_max_value;
2325         }
2326 }
2327
2328 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
2329 {
2330         /* Learn sign from signed bounds.
2331          * If we cannot cross the sign boundary, then signed and unsigned bounds
2332          * are the same, so combine.  This works even in the negative case, e.g.
2333          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
2334          */
2335         if (reg->smin_value >= 0 || reg->smax_value < 0) {
2336                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2337                                                           reg->umin_value);
2338                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2339                                                           reg->umax_value);
2340                 return;
2341         }
2342         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
2343          * boundary, so we must be careful.
2344          */
2345         if ((s64)reg->umax_value >= 0) {
2346                 /* Positive.  We can't learn anything from the smin, but smax
2347                  * is positive, hence safe.
2348                  */
2349                 reg->smin_value = reg->umin_value;
2350                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
2351                                                           reg->umax_value);
2352         } else if ((s64)reg->umin_value < 0) {
2353                 /* Negative.  We can't learn anything from the smax, but smin
2354                  * is negative, hence safe.
2355                  */
2356                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
2357                                                           reg->umin_value);
2358                 reg->smax_value = reg->umax_value;
2359         }
2360 }
2361
2362 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
2363 {
2364         __reg32_deduce_bounds(reg);
2365         __reg64_deduce_bounds(reg);
2366 }
2367
2368 /* Attempts to improve var_off based on unsigned min/max information */
2369 static void __reg_bound_offset(struct bpf_reg_state *reg)
2370 {
2371         struct tnum var64_off = tnum_intersect(reg->var_off,
2372                                                tnum_range(reg->umin_value,
2373                                                           reg->umax_value));
2374         struct tnum var32_off = tnum_intersect(tnum_subreg(var64_off),
2375                                                tnum_range(reg->u32_min_value,
2376                                                           reg->u32_max_value));
2377
2378         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
2379 }
2380
2381 static void reg_bounds_sync(struct bpf_reg_state *reg)
2382 {
2383         /* We might have learned new bounds from the var_off. */
2384         __update_reg_bounds(reg);
2385         /* We might have learned something about the sign bit. */
2386         __reg_deduce_bounds(reg);
2387         /* We might have learned some bits from the bounds. */
2388         __reg_bound_offset(reg);
2389         /* Intersecting with the old var_off might have improved our bounds
2390          * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
2391          * then new var_off is (0; 0x7f...fc) which improves our umax.
2392          */
2393         __update_reg_bounds(reg);
2394 }
2395
2396 static bool __reg32_bound_s64(s32 a)
2397 {
2398         return a >= 0 && a <= S32_MAX;
2399 }
2400
2401 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
2402 {
2403         reg->umin_value = reg->u32_min_value;
2404         reg->umax_value = reg->u32_max_value;
2405
2406         /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
2407          * be positive otherwise set to worse case bounds and refine later
2408          * from tnum.
2409          */
2410         if (__reg32_bound_s64(reg->s32_min_value) &&
2411             __reg32_bound_s64(reg->s32_max_value)) {
2412                 reg->smin_value = reg->s32_min_value;
2413                 reg->smax_value = reg->s32_max_value;
2414         } else {
2415                 reg->smin_value = 0;
2416                 reg->smax_value = U32_MAX;
2417         }
2418 }
2419
2420 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
2421 {
2422         /* special case when 64-bit register has upper 32-bit register
2423          * zeroed. Typically happens after zext or <<32, >>32 sequence
2424          * allowing us to use 32-bit bounds directly,
2425          */
2426         if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
2427                 __reg_assign_32_into_64(reg);
2428         } else {
2429                 /* Otherwise the best we can do is push lower 32bit known and
2430                  * unknown bits into register (var_off set from jmp logic)
2431                  * then learn as much as possible from the 64-bit tnum
2432                  * known and unknown bits. The previous smin/smax bounds are
2433                  * invalid here because of jmp32 compare so mark them unknown
2434                  * so they do not impact tnum bounds calculation.
2435                  */
2436                 __mark_reg64_unbounded(reg);
2437         }
2438         reg_bounds_sync(reg);
2439 }
2440
2441 static bool __reg64_bound_s32(s64 a)
2442 {
2443         return a >= S32_MIN && a <= S32_MAX;
2444 }
2445
2446 static bool __reg64_bound_u32(u64 a)
2447 {
2448         return a >= U32_MIN && a <= U32_MAX;
2449 }
2450
2451 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
2452 {
2453         __mark_reg32_unbounded(reg);
2454         if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
2455                 reg->s32_min_value = (s32)reg->smin_value;
2456                 reg->s32_max_value = (s32)reg->smax_value;
2457         }
2458         if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
2459                 reg->u32_min_value = (u32)reg->umin_value;
2460                 reg->u32_max_value = (u32)reg->umax_value;
2461         }
2462         reg_bounds_sync(reg);
2463 }
2464
2465 /* Mark a register as having a completely unknown (scalar) value. */
2466 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
2467                                struct bpf_reg_state *reg)
2468 {
2469         /*
2470          * Clear type, off, and union(map_ptr, range) and
2471          * padding between 'type' and union
2472          */
2473         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
2474         reg->type = SCALAR_VALUE;
2475         reg->id = 0;
2476         reg->ref_obj_id = 0;
2477         reg->var_off = tnum_unknown;
2478         reg->frameno = 0;
2479         reg->precise = !env->bpf_capable;
2480         __mark_reg_unbounded(reg);
2481 }
2482
2483 static void mark_reg_unknown(struct bpf_verifier_env *env,
2484                              struct bpf_reg_state *regs, u32 regno)
2485 {
2486         if (WARN_ON(regno >= MAX_BPF_REG)) {
2487                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
2488                 /* Something bad happened, let's kill all regs except FP */
2489                 for (regno = 0; regno < BPF_REG_FP; regno++)
2490                         __mark_reg_not_init(env, regs + regno);
2491                 return;
2492         }
2493         __mark_reg_unknown(env, regs + regno);
2494 }
2495
2496 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
2497                                 struct bpf_reg_state *reg)
2498 {
2499         __mark_reg_unknown(env, reg);
2500         reg->type = NOT_INIT;
2501 }
2502
2503 static void mark_reg_not_init(struct bpf_verifier_env *env,
2504                               struct bpf_reg_state *regs, u32 regno)
2505 {
2506         if (WARN_ON(regno >= MAX_BPF_REG)) {
2507                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
2508                 /* Something bad happened, let's kill all regs except FP */
2509                 for (regno = 0; regno < BPF_REG_FP; regno++)
2510                         __mark_reg_not_init(env, regs + regno);
2511                 return;
2512         }
2513         __mark_reg_not_init(env, regs + regno);
2514 }
2515
2516 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
2517                             struct bpf_reg_state *regs, u32 regno,
2518                             enum bpf_reg_type reg_type,
2519                             struct btf *btf, u32 btf_id,
2520                             enum bpf_type_flag flag)
2521 {
2522         if (reg_type == SCALAR_VALUE) {
2523                 mark_reg_unknown(env, regs, regno);
2524                 return;
2525         }
2526         mark_reg_known_zero(env, regs, regno);
2527         regs[regno].type = PTR_TO_BTF_ID | flag;
2528         regs[regno].btf = btf;
2529         regs[regno].btf_id = btf_id;
2530 }
2531
2532 #define DEF_NOT_SUBREG  (0)
2533 static void init_reg_state(struct bpf_verifier_env *env,
2534                            struct bpf_func_state *state)
2535 {
2536         struct bpf_reg_state *regs = state->regs;
2537         int i;
2538
2539         for (i = 0; i < MAX_BPF_REG; i++) {
2540                 mark_reg_not_init(env, regs, i);
2541                 regs[i].live = REG_LIVE_NONE;
2542                 regs[i].parent = NULL;
2543                 regs[i].subreg_def = DEF_NOT_SUBREG;
2544         }
2545
2546         /* frame pointer */
2547         regs[BPF_REG_FP].type = PTR_TO_STACK;
2548         mark_reg_known_zero(env, regs, BPF_REG_FP);
2549         regs[BPF_REG_FP].frameno = state->frameno;
2550 }
2551
2552 #define BPF_MAIN_FUNC (-1)
2553 static void init_func_state(struct bpf_verifier_env *env,
2554                             struct bpf_func_state *state,
2555                             int callsite, int frameno, int subprogno)
2556 {
2557         state->callsite = callsite;
2558         state->frameno = frameno;
2559         state->subprogno = subprogno;
2560         state->callback_ret_range = tnum_range(0, 0);
2561         init_reg_state(env, state);
2562         mark_verifier_state_scratched(env);
2563 }
2564
2565 /* Similar to push_stack(), but for async callbacks */
2566 static struct bpf_verifier_state *push_async_cb(struct bpf_verifier_env *env,
2567                                                 int insn_idx, int prev_insn_idx,
2568                                                 int subprog)
2569 {
2570         struct bpf_verifier_stack_elem *elem;
2571         struct bpf_func_state *frame;
2572
2573         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
2574         if (!elem)
2575                 goto err;
2576
2577         elem->insn_idx = insn_idx;
2578         elem->prev_insn_idx = prev_insn_idx;
2579         elem->next = env->head;
2580         elem->log_pos = env->log.end_pos;
2581         env->head = elem;
2582         env->stack_size++;
2583         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
2584                 verbose(env,
2585                         "The sequence of %d jumps is too complex for async cb.\n",
2586                         env->stack_size);
2587                 goto err;
2588         }
2589         /* Unlike push_stack() do not copy_verifier_state().
2590          * The caller state doesn't matter.
2591          * This is async callback. It starts in a fresh stack.
2592          * Initialize it similar to do_check_common().
2593          */
2594         elem->st.branches = 1;
2595         frame = kzalloc(sizeof(*frame), GFP_KERNEL);
2596         if (!frame)
2597                 goto err;
2598         init_func_state(env, frame,
2599                         BPF_MAIN_FUNC /* callsite */,
2600                         0 /* frameno within this callchain */,
2601                         subprog /* subprog number within this prog */);
2602         elem->st.frame[0] = frame;
2603         return &elem->st;
2604 err:
2605         free_verifier_state(env->cur_state, true);
2606         env->cur_state = NULL;
2607         /* pop all elements and return */
2608         while (!pop_stack(env, NULL, NULL, false));
2609         return NULL;
2610 }
2611
2612
2613 enum reg_arg_type {
2614         SRC_OP,         /* register is used as source operand */
2615         DST_OP,         /* register is used as destination operand */
2616         DST_OP_NO_MARK  /* same as above, check only, don't mark */
2617 };
2618
2619 static int cmp_subprogs(const void *a, const void *b)
2620 {
2621         return ((struct bpf_subprog_info *)a)->start -
2622                ((struct bpf_subprog_info *)b)->start;
2623 }
2624
2625 static int find_subprog(struct bpf_verifier_env *env, int off)
2626 {
2627         struct bpf_subprog_info *p;
2628
2629         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
2630                     sizeof(env->subprog_info[0]), cmp_subprogs);
2631         if (!p)
2632                 return -ENOENT;
2633         return p - env->subprog_info;
2634
2635 }
2636
2637 static int add_subprog(struct bpf_verifier_env *env, int off)
2638 {
2639         int insn_cnt = env->prog->len;
2640         int ret;
2641
2642         if (off >= insn_cnt || off < 0) {
2643                 verbose(env, "call to invalid destination\n");
2644                 return -EINVAL;
2645         }
2646         ret = find_subprog(env, off);
2647         if (ret >= 0)
2648                 return ret;
2649         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
2650                 verbose(env, "too many subprograms\n");
2651                 return -E2BIG;
2652         }
2653         /* determine subprog starts. The end is one before the next starts */
2654         env->subprog_info[env->subprog_cnt++].start = off;
2655         sort(env->subprog_info, env->subprog_cnt,
2656              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
2657         return env->subprog_cnt - 1;
2658 }
2659
2660 #define MAX_KFUNC_DESCS 256
2661 #define MAX_KFUNC_BTFS  256
2662
2663 struct bpf_kfunc_desc {
2664         struct btf_func_model func_model;
2665         u32 func_id;
2666         s32 imm;
2667         u16 offset;
2668         unsigned long addr;
2669 };
2670
2671 struct bpf_kfunc_btf {
2672         struct btf *btf;
2673         struct module *module;
2674         u16 offset;
2675 };
2676
2677 struct bpf_kfunc_desc_tab {
2678         /* Sorted by func_id (BTF ID) and offset (fd_array offset) during
2679          * verification. JITs do lookups by bpf_insn, where func_id may not be
2680          * available, therefore at the end of verification do_misc_fixups()
2681          * sorts this by imm and offset.
2682          */
2683         struct bpf_kfunc_desc descs[MAX_KFUNC_DESCS];
2684         u32 nr_descs;
2685 };
2686
2687 struct bpf_kfunc_btf_tab {
2688         struct bpf_kfunc_btf descs[MAX_KFUNC_BTFS];
2689         u32 nr_descs;
2690 };
2691
2692 static int kfunc_desc_cmp_by_id_off(const void *a, const void *b)
2693 {
2694         const struct bpf_kfunc_desc *d0 = a;
2695         const struct bpf_kfunc_desc *d1 = b;
2696
2697         /* func_id is not greater than BTF_MAX_TYPE */
2698         return d0->func_id - d1->func_id ?: d0->offset - d1->offset;
2699 }
2700
2701 static int kfunc_btf_cmp_by_off(const void *a, const void *b)
2702 {
2703         const struct bpf_kfunc_btf *d0 = a;
2704         const struct bpf_kfunc_btf *d1 = b;
2705
2706         return d0->offset - d1->offset;
2707 }
2708
2709 static const struct bpf_kfunc_desc *
2710 find_kfunc_desc(const struct bpf_prog *prog, u32 func_id, u16 offset)
2711 {
2712         struct bpf_kfunc_desc desc = {
2713                 .func_id = func_id,
2714                 .offset = offset,
2715         };
2716         struct bpf_kfunc_desc_tab *tab;
2717
2718         tab = prog->aux->kfunc_tab;
2719         return bsearch(&desc, tab->descs, tab->nr_descs,
2720                        sizeof(tab->descs[0]), kfunc_desc_cmp_by_id_off);
2721 }
2722
2723 int bpf_get_kfunc_addr(const struct bpf_prog *prog, u32 func_id,
2724                        u16 btf_fd_idx, u8 **func_addr)
2725 {
2726         const struct bpf_kfunc_desc *desc;
2727
2728         desc = find_kfunc_desc(prog, func_id, btf_fd_idx);
2729         if (!desc)
2730                 return -EFAULT;
2731
2732         *func_addr = (u8 *)desc->addr;
2733         return 0;
2734 }
2735
2736 static struct btf *__find_kfunc_desc_btf(struct bpf_verifier_env *env,
2737                                          s16 offset)
2738 {
2739         struct bpf_kfunc_btf kf_btf = { .offset = offset };
2740         struct bpf_kfunc_btf_tab *tab;
2741         struct bpf_kfunc_btf *b;
2742         struct module *mod;
2743         struct btf *btf;
2744         int btf_fd;
2745
2746         tab = env->prog->aux->kfunc_btf_tab;
2747         b = bsearch(&kf_btf, tab->descs, tab->nr_descs,
2748                     sizeof(tab->descs[0]), kfunc_btf_cmp_by_off);
2749         if (!b) {
2750                 if (tab->nr_descs == MAX_KFUNC_BTFS) {
2751                         verbose(env, "too many different module BTFs\n");
2752                         return ERR_PTR(-E2BIG);
2753                 }
2754
2755                 if (bpfptr_is_null(env->fd_array)) {
2756                         verbose(env, "kfunc offset > 0 without fd_array is invalid\n");
2757                         return ERR_PTR(-EPROTO);
2758                 }
2759
2760                 if (copy_from_bpfptr_offset(&btf_fd, env->fd_array,
2761                                             offset * sizeof(btf_fd),
2762                                             sizeof(btf_fd)))
2763                         return ERR_PTR(-EFAULT);
2764
2765                 btf = btf_get_by_fd(btf_fd);
2766                 if (IS_ERR(btf)) {
2767                         verbose(env, "invalid module BTF fd specified\n");
2768                         return btf;
2769                 }
2770
2771                 if (!btf_is_module(btf)) {
2772                         verbose(env, "BTF fd for kfunc is not a module BTF\n");
2773                         btf_put(btf);
2774                         return ERR_PTR(-EINVAL);
2775                 }
2776
2777                 mod = btf_try_get_module(btf);
2778                 if (!mod) {
2779                         btf_put(btf);
2780                         return ERR_PTR(-ENXIO);
2781                 }
2782
2783                 b = &tab->descs[tab->nr_descs++];
2784                 b->btf = btf;
2785                 b->module = mod;
2786                 b->offset = offset;
2787
2788                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2789                      kfunc_btf_cmp_by_off, NULL);
2790         }
2791         return b->btf;
2792 }
2793
2794 void bpf_free_kfunc_btf_tab(struct bpf_kfunc_btf_tab *tab)
2795 {
2796         if (!tab)
2797                 return;
2798
2799         while (tab->nr_descs--) {
2800                 module_put(tab->descs[tab->nr_descs].module);
2801                 btf_put(tab->descs[tab->nr_descs].btf);
2802         }
2803         kfree(tab);
2804 }
2805
2806 static struct btf *find_kfunc_desc_btf(struct bpf_verifier_env *env, s16 offset)
2807 {
2808         if (offset) {
2809                 if (offset < 0) {
2810                         /* In the future, this can be allowed to increase limit
2811                          * of fd index into fd_array, interpreted as u16.
2812                          */
2813                         verbose(env, "negative offset disallowed for kernel module function call\n");
2814                         return ERR_PTR(-EINVAL);
2815                 }
2816
2817                 return __find_kfunc_desc_btf(env, offset);
2818         }
2819         return btf_vmlinux ?: ERR_PTR(-ENOENT);
2820 }
2821
2822 static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
2823 {
2824         const struct btf_type *func, *func_proto;
2825         struct bpf_kfunc_btf_tab *btf_tab;
2826         struct bpf_kfunc_desc_tab *tab;
2827         struct bpf_prog_aux *prog_aux;
2828         struct bpf_kfunc_desc *desc;
2829         const char *func_name;
2830         struct btf *desc_btf;
2831         unsigned long call_imm;
2832         unsigned long addr;
2833         int err;
2834
2835         prog_aux = env->prog->aux;
2836         tab = prog_aux->kfunc_tab;
2837         btf_tab = prog_aux->kfunc_btf_tab;
2838         if (!tab) {
2839                 if (!btf_vmlinux) {
2840                         verbose(env, "calling kernel function is not supported without CONFIG_DEBUG_INFO_BTF\n");
2841                         return -ENOTSUPP;
2842                 }
2843
2844                 if (!env->prog->jit_requested) {
2845                         verbose(env, "JIT is required for calling kernel function\n");
2846                         return -ENOTSUPP;
2847                 }
2848
2849                 if (!bpf_jit_supports_kfunc_call()) {
2850                         verbose(env, "JIT does not support calling kernel function\n");
2851                         return -ENOTSUPP;
2852                 }
2853
2854                 if (!env->prog->gpl_compatible) {
2855                         verbose(env, "cannot call kernel function from non-GPL compatible program\n");
2856                         return -EINVAL;
2857                 }
2858
2859                 tab = kzalloc(sizeof(*tab), GFP_KERNEL);
2860                 if (!tab)
2861                         return -ENOMEM;
2862                 prog_aux->kfunc_tab = tab;
2863         }
2864
2865         /* func_id == 0 is always invalid, but instead of returning an error, be
2866          * conservative and wait until the code elimination pass before returning
2867          * error, so that invalid calls that get pruned out can be in BPF programs
2868          * loaded from userspace.  It is also required that offset be untouched
2869          * for such calls.
2870          */
2871         if (!func_id && !offset)
2872                 return 0;
2873
2874         if (!btf_tab && offset) {
2875                 btf_tab = kzalloc(sizeof(*btf_tab), GFP_KERNEL);
2876                 if (!btf_tab)
2877                         return -ENOMEM;
2878                 prog_aux->kfunc_btf_tab = btf_tab;
2879         }
2880
2881         desc_btf = find_kfunc_desc_btf(env, offset);
2882         if (IS_ERR(desc_btf)) {
2883                 verbose(env, "failed to find BTF for kernel function\n");
2884                 return PTR_ERR(desc_btf);
2885         }
2886
2887         if (find_kfunc_desc(env->prog, func_id, offset))
2888                 return 0;
2889
2890         if (tab->nr_descs == MAX_KFUNC_DESCS) {
2891                 verbose(env, "too many different kernel function calls\n");
2892                 return -E2BIG;
2893         }
2894
2895         func = btf_type_by_id(desc_btf, func_id);
2896         if (!func || !btf_type_is_func(func)) {
2897                 verbose(env, "kernel btf_id %u is not a function\n",
2898                         func_id);
2899                 return -EINVAL;
2900         }
2901         func_proto = btf_type_by_id(desc_btf, func->type);
2902         if (!func_proto || !btf_type_is_func_proto(func_proto)) {
2903                 verbose(env, "kernel function btf_id %u does not have a valid func_proto\n",
2904                         func_id);
2905                 return -EINVAL;
2906         }
2907
2908         func_name = btf_name_by_offset(desc_btf, func->name_off);
2909         addr = kallsyms_lookup_name(func_name);
2910         if (!addr) {
2911                 verbose(env, "cannot find address for kernel function %s\n",
2912                         func_name);
2913                 return -EINVAL;
2914         }
2915         specialize_kfunc(env, func_id, offset, &addr);
2916
2917         if (bpf_jit_supports_far_kfunc_call()) {
2918                 call_imm = func_id;
2919         } else {
2920                 call_imm = BPF_CALL_IMM(addr);
2921                 /* Check whether the relative offset overflows desc->imm */
2922                 if ((unsigned long)(s32)call_imm != call_imm) {
2923                         verbose(env, "address of kernel function %s is out of range\n",
2924                                 func_name);
2925                         return -EINVAL;
2926                 }
2927         }
2928
2929         if (bpf_dev_bound_kfunc_id(func_id)) {
2930                 err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
2931                 if (err)
2932                         return err;
2933         }
2934
2935         desc = &tab->descs[tab->nr_descs++];
2936         desc->func_id = func_id;
2937         desc->imm = call_imm;
2938         desc->offset = offset;
2939         desc->addr = addr;
2940         err = btf_distill_func_proto(&env->log, desc_btf,
2941                                      func_proto, func_name,
2942                                      &desc->func_model);
2943         if (!err)
2944                 sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2945                      kfunc_desc_cmp_by_id_off, NULL);
2946         return err;
2947 }
2948
2949 static int kfunc_desc_cmp_by_imm_off(const void *a, const void *b)
2950 {
2951         const struct bpf_kfunc_desc *d0 = a;
2952         const struct bpf_kfunc_desc *d1 = b;
2953
2954         if (d0->imm != d1->imm)
2955                 return d0->imm < d1->imm ? -1 : 1;
2956         if (d0->offset != d1->offset)
2957                 return d0->offset < d1->offset ? -1 : 1;
2958         return 0;
2959 }
2960
2961 static void sort_kfunc_descs_by_imm_off(struct bpf_prog *prog)
2962 {
2963         struct bpf_kfunc_desc_tab *tab;
2964
2965         tab = prog->aux->kfunc_tab;
2966         if (!tab)
2967                 return;
2968
2969         sort(tab->descs, tab->nr_descs, sizeof(tab->descs[0]),
2970              kfunc_desc_cmp_by_imm_off, NULL);
2971 }
2972
2973 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog)
2974 {
2975         return !!prog->aux->kfunc_tab;
2976 }
2977
2978 const struct btf_func_model *
2979 bpf_jit_find_kfunc_model(const struct bpf_prog *prog,
2980                          const struct bpf_insn *insn)
2981 {
2982         const struct bpf_kfunc_desc desc = {
2983                 .imm = insn->imm,
2984                 .offset = insn->off,
2985         };
2986         const struct bpf_kfunc_desc *res;
2987         struct bpf_kfunc_desc_tab *tab;
2988
2989         tab = prog->aux->kfunc_tab;
2990         res = bsearch(&desc, tab->descs, tab->nr_descs,
2991                       sizeof(tab->descs[0]), kfunc_desc_cmp_by_imm_off);
2992
2993         return res ? &res->func_model : NULL;
2994 }
2995
2996 static int add_subprog_and_kfunc(struct bpf_verifier_env *env)
2997 {
2998         struct bpf_subprog_info *subprog = env->subprog_info;
2999         struct bpf_insn *insn = env->prog->insnsi;
3000         int i, ret, insn_cnt = env->prog->len;
3001
3002         /* Add entry function. */
3003         ret = add_subprog(env, 0);
3004         if (ret)
3005                 return ret;
3006
3007         for (i = 0; i < insn_cnt; i++, insn++) {
3008                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn) &&
3009                     !bpf_pseudo_kfunc_call(insn))
3010                         continue;
3011
3012                 if (!env->bpf_capable) {
3013                         verbose(env, "loading/calling other bpf or kernel functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
3014                         return -EPERM;
3015                 }
3016
3017                 if (bpf_pseudo_func(insn) || bpf_pseudo_call(insn))
3018                         ret = add_subprog(env, i + insn->imm + 1);
3019                 else
3020                         ret = add_kfunc_call(env, insn->imm, insn->off);
3021
3022                 if (ret < 0)
3023                         return ret;
3024         }
3025
3026         /* Add a fake 'exit' subprog which could simplify subprog iteration
3027          * logic. 'subprog_cnt' should not be increased.
3028          */
3029         subprog[env->subprog_cnt].start = insn_cnt;
3030
3031         if (env->log.level & BPF_LOG_LEVEL2)
3032                 for (i = 0; i < env->subprog_cnt; i++)
3033                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
3034
3035         return 0;
3036 }
3037
3038 static int check_subprogs(struct bpf_verifier_env *env)
3039 {
3040         int i, subprog_start, subprog_end, off, cur_subprog = 0;
3041         struct bpf_subprog_info *subprog = env->subprog_info;
3042         struct bpf_insn *insn = env->prog->insnsi;
3043         int insn_cnt = env->prog->len;
3044
3045         /* now check that all jumps are within the same subprog */
3046         subprog_start = subprog[cur_subprog].start;
3047         subprog_end = subprog[cur_subprog + 1].start;
3048         for (i = 0; i < insn_cnt; i++) {
3049                 u8 code = insn[i].code;
3050
3051                 if (code == (BPF_JMP | BPF_CALL) &&
3052                     insn[i].src_reg == 0 &&
3053                     insn[i].imm == BPF_FUNC_tail_call)
3054                         subprog[cur_subprog].has_tail_call = true;
3055                 if (BPF_CLASS(code) == BPF_LD &&
3056                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
3057                         subprog[cur_subprog].has_ld_abs = true;
3058                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
3059                         goto next;
3060                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
3061                         goto next;
3062                 if (code == (BPF_JMP32 | BPF_JA))
3063                         off = i + insn[i].imm + 1;
3064                 else
3065                         off = i + insn[i].off + 1;
3066                 if (off < subprog_start || off >= subprog_end) {
3067                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
3068                         return -EINVAL;
3069                 }
3070 next:
3071                 if (i == subprog_end - 1) {
3072                         /* to avoid fall-through from one subprog into another
3073                          * the last insn of the subprog should be either exit
3074                          * or unconditional jump back
3075                          */
3076                         if (code != (BPF_JMP | BPF_EXIT) &&
3077                             code != (BPF_JMP32 | BPF_JA) &&
3078                             code != (BPF_JMP | BPF_JA)) {
3079                                 verbose(env, "last insn is not an exit or jmp\n");
3080                                 return -EINVAL;
3081                         }
3082                         subprog_start = subprog_end;
3083                         cur_subprog++;
3084                         if (cur_subprog < env->subprog_cnt)
3085                                 subprog_end = subprog[cur_subprog + 1].start;
3086                 }
3087         }
3088         return 0;
3089 }
3090
3091 /* Parentage chain of this register (or stack slot) should take care of all
3092  * issues like callee-saved registers, stack slot allocation time, etc.
3093  */
3094 static int mark_reg_read(struct bpf_verifier_env *env,
3095                          const struct bpf_reg_state *state,
3096                          struct bpf_reg_state *parent, u8 flag)
3097 {
3098         bool writes = parent == state->parent; /* Observe write marks */
3099         int cnt = 0;
3100
3101         while (parent) {
3102                 /* if read wasn't screened by an earlier write ... */
3103                 if (writes && state->live & REG_LIVE_WRITTEN)
3104                         break;
3105                 if (parent->live & REG_LIVE_DONE) {
3106                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
3107                                 reg_type_str(env, parent->type),
3108                                 parent->var_off.value, parent->off);
3109                         return -EFAULT;
3110                 }
3111                 /* The first condition is more likely to be true than the
3112                  * second, checked it first.
3113                  */
3114                 if ((parent->live & REG_LIVE_READ) == flag ||
3115                     parent->live & REG_LIVE_READ64)
3116                         /* The parentage chain never changes and
3117                          * this parent was already marked as LIVE_READ.
3118                          * There is no need to keep walking the chain again and
3119                          * keep re-marking all parents as LIVE_READ.
3120                          * This case happens when the same register is read
3121                          * multiple times without writes into it in-between.
3122                          * Also, if parent has the stronger REG_LIVE_READ64 set,
3123                          * then no need to set the weak REG_LIVE_READ32.
3124                          */
3125                         break;
3126                 /* ... then we depend on parent's value */
3127                 parent->live |= flag;
3128                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
3129                 if (flag == REG_LIVE_READ64)
3130                         parent->live &= ~REG_LIVE_READ32;
3131                 state = parent;
3132                 parent = state->parent;
3133                 writes = true;
3134                 cnt++;
3135         }
3136
3137         if (env->longest_mark_read_walk < cnt)
3138                 env->longest_mark_read_walk = cnt;
3139         return 0;
3140 }
3141
3142 static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
3143 {
3144         struct bpf_func_state *state = func(env, reg);
3145         int spi, ret;
3146
3147         /* For CONST_PTR_TO_DYNPTR, it must have already been done by
3148          * check_reg_arg in check_helper_call and mark_btf_func_reg_size in
3149          * check_kfunc_call.
3150          */
3151         if (reg->type == CONST_PTR_TO_DYNPTR)
3152                 return 0;
3153         spi = dynptr_get_spi(env, reg);
3154         if (spi < 0)
3155                 return spi;
3156         /* Caller ensures dynptr is valid and initialized, which means spi is in
3157          * bounds and spi is the first dynptr slot. Simply mark stack slot as
3158          * read.
3159          */
3160         ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
3161                             state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
3162         if (ret)
3163                 return ret;
3164         return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
3165                              state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
3166 }
3167
3168 static int mark_iter_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
3169                           int spi, int nr_slots)
3170 {
3171         struct bpf_func_state *state = func(env, reg);
3172         int err, i;
3173
3174         for (i = 0; i < nr_slots; i++) {
3175                 struct bpf_reg_state *st = &state->stack[spi - i].spilled_ptr;
3176
3177                 err = mark_reg_read(env, st, st->parent, REG_LIVE_READ64);
3178                 if (err)
3179                         return err;
3180
3181                 mark_stack_slot_scratched(env, spi - i);
3182         }
3183
3184         return 0;
3185 }
3186
3187 /* This function is supposed to be used by the following 32-bit optimization
3188  * code only. It returns TRUE if the source or destination register operates
3189  * on 64-bit, otherwise return FALSE.
3190  */
3191 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
3192                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
3193 {
3194         u8 code, class, op;
3195
3196         code = insn->code;
3197         class = BPF_CLASS(code);
3198         op = BPF_OP(code);
3199         if (class == BPF_JMP) {
3200                 /* BPF_EXIT for "main" will reach here. Return TRUE
3201                  * conservatively.
3202                  */
3203                 if (op == BPF_EXIT)
3204                         return true;
3205                 if (op == BPF_CALL) {
3206                         /* BPF to BPF call will reach here because of marking
3207                          * caller saved clobber with DST_OP_NO_MARK for which we
3208                          * don't care the register def because they are anyway
3209                          * marked as NOT_INIT already.
3210                          */
3211                         if (insn->src_reg == BPF_PSEUDO_CALL)
3212                                 return false;
3213                         /* Helper call will reach here because of arg type
3214                          * check, conservatively return TRUE.
3215                          */
3216                         if (t == SRC_OP)
3217                                 return true;
3218
3219                         return false;
3220                 }
3221         }
3222
3223         if (class == BPF_ALU64 && op == BPF_END && (insn->imm == 16 || insn->imm == 32))
3224                 return false;
3225
3226         if (class == BPF_ALU64 || class == BPF_JMP ||
3227             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
3228                 return true;
3229
3230         if (class == BPF_ALU || class == BPF_JMP32)
3231                 return false;
3232
3233         if (class == BPF_LDX) {
3234                 if (t != SRC_OP)
3235                         return BPF_SIZE(code) == BPF_DW;
3236                 /* LDX source must be ptr. */
3237                 return true;
3238         }
3239
3240         if (class == BPF_STX) {
3241                 /* BPF_STX (including atomic variants) has multiple source
3242                  * operands, one of which is a ptr. Check whether the caller is
3243                  * asking about it.
3244                  */
3245                 if (t == SRC_OP && reg->type != SCALAR_VALUE)
3246                         return true;
3247                 return BPF_SIZE(code) == BPF_DW;
3248         }
3249
3250         if (class == BPF_LD) {
3251                 u8 mode = BPF_MODE(code);
3252
3253                 /* LD_IMM64 */
3254                 if (mode == BPF_IMM)
3255                         return true;
3256
3257                 /* Both LD_IND and LD_ABS return 32-bit data. */
3258                 if (t != SRC_OP)
3259                         return  false;
3260
3261                 /* Implicit ctx ptr. */
3262                 if (regno == BPF_REG_6)
3263                         return true;
3264
3265                 /* Explicit source could be any width. */
3266                 return true;
3267         }
3268
3269         if (class == BPF_ST)
3270                 /* The only source register for BPF_ST is a ptr. */
3271                 return true;
3272
3273         /* Conservatively return true at default. */
3274         return true;
3275 }
3276
3277 /* Return the regno defined by the insn, or -1. */
3278 static int insn_def_regno(const struct bpf_insn *insn)
3279 {
3280         switch (BPF_CLASS(insn->code)) {
3281         case BPF_JMP:
3282         case BPF_JMP32:
3283         case BPF_ST:
3284                 return -1;
3285         case BPF_STX:
3286                 if (BPF_MODE(insn->code) == BPF_ATOMIC &&
3287                     (insn->imm & BPF_FETCH)) {
3288                         if (insn->imm == BPF_CMPXCHG)
3289                                 return BPF_REG_0;
3290                         else
3291                                 return insn->src_reg;
3292                 } else {
3293                         return -1;
3294                 }
3295         default:
3296                 return insn->dst_reg;
3297         }
3298 }
3299
3300 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
3301 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
3302 {
3303         int dst_reg = insn_def_regno(insn);
3304
3305         if (dst_reg == -1)
3306                 return false;
3307
3308         return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
3309 }
3310
3311 static void mark_insn_zext(struct bpf_verifier_env *env,
3312                            struct bpf_reg_state *reg)
3313 {
3314         s32 def_idx = reg->subreg_def;
3315
3316         if (def_idx == DEF_NOT_SUBREG)
3317                 return;
3318
3319         env->insn_aux_data[def_idx - 1].zext_dst = true;
3320         /* The dst will be zero extended, so won't be sub-register anymore. */
3321         reg->subreg_def = DEF_NOT_SUBREG;
3322 }
3323
3324 static int __check_reg_arg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
3325                            enum reg_arg_type t)
3326 {
3327         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
3328         struct bpf_reg_state *reg;
3329         bool rw64;
3330
3331         if (regno >= MAX_BPF_REG) {
3332                 verbose(env, "R%d is invalid\n", regno);
3333                 return -EINVAL;
3334         }
3335
3336         mark_reg_scratched(env, regno);
3337
3338         reg = &regs[regno];
3339         rw64 = is_reg64(env, insn, regno, reg, t);
3340         if (t == SRC_OP) {
3341                 /* check whether register used as source operand can be read */
3342                 if (reg->type == NOT_INIT) {
3343                         verbose(env, "R%d !read_ok\n", regno);
3344                         return -EACCES;
3345                 }
3346                 /* We don't need to worry about FP liveness because it's read-only */
3347                 if (regno == BPF_REG_FP)
3348                         return 0;
3349
3350                 if (rw64)
3351                         mark_insn_zext(env, reg);
3352
3353                 return mark_reg_read(env, reg, reg->parent,
3354                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
3355         } else {
3356                 /* check whether register used as dest operand can be written to */
3357                 if (regno == BPF_REG_FP) {
3358                         verbose(env, "frame pointer is read only\n");
3359                         return -EACCES;
3360                 }
3361                 reg->live |= REG_LIVE_WRITTEN;
3362                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
3363                 if (t == DST_OP)
3364                         mark_reg_unknown(env, regs, regno);
3365         }
3366         return 0;
3367 }
3368
3369 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
3370                          enum reg_arg_type t)
3371 {
3372         struct bpf_verifier_state *vstate = env->cur_state;
3373         struct bpf_func_state *state = vstate->frame[vstate->curframe];
3374
3375         return __check_reg_arg(env, state->regs, regno, t);
3376 }
3377
3378 static void mark_jmp_point(struct bpf_verifier_env *env, int idx)
3379 {
3380         env->insn_aux_data[idx].jmp_point = true;
3381 }
3382
3383 static bool is_jmp_point(struct bpf_verifier_env *env, int insn_idx)
3384 {
3385         return env->insn_aux_data[insn_idx].jmp_point;
3386 }
3387
3388 /* for any branch, call, exit record the history of jmps in the given state */
3389 static int push_jmp_history(struct bpf_verifier_env *env,
3390                             struct bpf_verifier_state *cur)
3391 {
3392         u32 cnt = cur->jmp_history_cnt;
3393         struct bpf_idx_pair *p;
3394         size_t alloc_size;
3395
3396         if (!is_jmp_point(env, env->insn_idx))
3397                 return 0;
3398
3399         cnt++;
3400         alloc_size = kmalloc_size_roundup(size_mul(cnt, sizeof(*p)));
3401         p = krealloc(cur->jmp_history, alloc_size, GFP_USER);
3402         if (!p)
3403                 return -ENOMEM;
3404         p[cnt - 1].idx = env->insn_idx;
3405         p[cnt - 1].prev_idx = env->prev_insn_idx;
3406         cur->jmp_history = p;
3407         cur->jmp_history_cnt = cnt;
3408         return 0;
3409 }
3410
3411 /* Backtrack one insn at a time. If idx is not at the top of recorded
3412  * history then previous instruction came from straight line execution.
3413  * Return -ENOENT if we exhausted all instructions within given state.
3414  *
3415  * It's legal to have a bit of a looping with the same starting and ending
3416  * insn index within the same state, e.g.: 3->4->5->3, so just because current
3417  * instruction index is the same as state's first_idx doesn't mean we are
3418  * done. If there is still some jump history left, we should keep going. We
3419  * need to take into account that we might have a jump history between given
3420  * state's parent and itself, due to checkpointing. In this case, we'll have
3421  * history entry recording a jump from last instruction of parent state and
3422  * first instruction of given state.
3423  */
3424 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
3425                              u32 *history)
3426 {
3427         u32 cnt = *history;
3428
3429         if (i == st->first_insn_idx) {
3430                 if (cnt == 0)
3431                         return -ENOENT;
3432                 if (cnt == 1 && st->jmp_history[0].idx == i)
3433                         return -ENOENT;
3434         }
3435
3436         if (cnt && st->jmp_history[cnt - 1].idx == i) {
3437                 i = st->jmp_history[cnt - 1].prev_idx;
3438                 (*history)--;
3439         } else {
3440                 i--;
3441         }
3442         return i;
3443 }
3444
3445 static const char *disasm_kfunc_name(void *data, const struct bpf_insn *insn)
3446 {
3447         const struct btf_type *func;
3448         struct btf *desc_btf;
3449
3450         if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL)
3451                 return NULL;
3452
3453         desc_btf = find_kfunc_desc_btf(data, insn->off);
3454         if (IS_ERR(desc_btf))
3455                 return "<error>";
3456
3457         func = btf_type_by_id(desc_btf, insn->imm);
3458         return btf_name_by_offset(desc_btf, func->name_off);
3459 }
3460
3461 static inline void bt_init(struct backtrack_state *bt, u32 frame)
3462 {
3463         bt->frame = frame;
3464 }
3465
3466 static inline void bt_reset(struct backtrack_state *bt)
3467 {
3468         struct bpf_verifier_env *env = bt->env;
3469
3470         memset(bt, 0, sizeof(*bt));
3471         bt->env = env;
3472 }
3473
3474 static inline u32 bt_empty(struct backtrack_state *bt)
3475 {
3476         u64 mask = 0;
3477         int i;
3478
3479         for (i = 0; i <= bt->frame; i++)
3480                 mask |= bt->reg_masks[i] | bt->stack_masks[i];
3481
3482         return mask == 0;
3483 }
3484
3485 static inline int bt_subprog_enter(struct backtrack_state *bt)
3486 {
3487         if (bt->frame == MAX_CALL_FRAMES - 1) {
3488                 verbose(bt->env, "BUG subprog enter from frame %d\n", bt->frame);
3489                 WARN_ONCE(1, "verifier backtracking bug");
3490                 return -EFAULT;
3491         }
3492         bt->frame++;
3493         return 0;
3494 }
3495
3496 static inline int bt_subprog_exit(struct backtrack_state *bt)
3497 {
3498         if (bt->frame == 0) {
3499                 verbose(bt->env, "BUG subprog exit from frame 0\n");
3500                 WARN_ONCE(1, "verifier backtracking bug");
3501                 return -EFAULT;
3502         }
3503         bt->frame--;
3504         return 0;
3505 }
3506
3507 static inline void bt_set_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3508 {
3509         bt->reg_masks[frame] |= 1 << reg;
3510 }
3511
3512 static inline void bt_clear_frame_reg(struct backtrack_state *bt, u32 frame, u32 reg)
3513 {
3514         bt->reg_masks[frame] &= ~(1 << reg);
3515 }
3516
3517 static inline void bt_set_reg(struct backtrack_state *bt, u32 reg)
3518 {
3519         bt_set_frame_reg(bt, bt->frame, reg);
3520 }
3521
3522 static inline void bt_clear_reg(struct backtrack_state *bt, u32 reg)
3523 {
3524         bt_clear_frame_reg(bt, bt->frame, reg);
3525 }
3526
3527 static inline void bt_set_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3528 {
3529         bt->stack_masks[frame] |= 1ull << slot;
3530 }
3531
3532 static inline void bt_clear_frame_slot(struct backtrack_state *bt, u32 frame, u32 slot)
3533 {
3534         bt->stack_masks[frame] &= ~(1ull << slot);
3535 }
3536
3537 static inline void bt_set_slot(struct backtrack_state *bt, u32 slot)
3538 {
3539         bt_set_frame_slot(bt, bt->frame, slot);
3540 }
3541
3542 static inline void bt_clear_slot(struct backtrack_state *bt, u32 slot)
3543 {
3544         bt_clear_frame_slot(bt, bt->frame, slot);
3545 }
3546
3547 static inline u32 bt_frame_reg_mask(struct backtrack_state *bt, u32 frame)
3548 {
3549         return bt->reg_masks[frame];
3550 }
3551
3552 static inline u32 bt_reg_mask(struct backtrack_state *bt)
3553 {
3554         return bt->reg_masks[bt->frame];
3555 }
3556
3557 static inline u64 bt_frame_stack_mask(struct backtrack_state *bt, u32 frame)
3558 {
3559         return bt->stack_masks[frame];
3560 }
3561
3562 static inline u64 bt_stack_mask(struct backtrack_state *bt)
3563 {
3564         return bt->stack_masks[bt->frame];
3565 }
3566
3567 static inline bool bt_is_reg_set(struct backtrack_state *bt, u32 reg)
3568 {
3569         return bt->reg_masks[bt->frame] & (1 << reg);
3570 }
3571
3572 static inline bool bt_is_slot_set(struct backtrack_state *bt, u32 slot)
3573 {
3574         return bt->stack_masks[bt->frame] & (1ull << slot);
3575 }
3576
3577 /* format registers bitmask, e.g., "r0,r2,r4" for 0x15 mask */
3578 static void fmt_reg_mask(char *buf, ssize_t buf_sz, u32 reg_mask)
3579 {
3580         DECLARE_BITMAP(mask, 64);
3581         bool first = true;
3582         int i, n;
3583
3584         buf[0] = '\0';
3585
3586         bitmap_from_u64(mask, reg_mask);
3587         for_each_set_bit(i, mask, 32) {
3588                 n = snprintf(buf, buf_sz, "%sr%d", first ? "" : ",", i);
3589                 first = false;
3590                 buf += n;
3591                 buf_sz -= n;
3592                 if (buf_sz < 0)
3593                         break;
3594         }
3595 }
3596 /* format stack slots bitmask, e.g., "-8,-24,-40" for 0x15 mask */
3597 static void fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask)
3598 {
3599         DECLARE_BITMAP(mask, 64);
3600         bool first = true;
3601         int i, n;
3602
3603         buf[0] = '\0';
3604
3605         bitmap_from_u64(mask, stack_mask);
3606         for_each_set_bit(i, mask, 64) {
3607                 n = snprintf(buf, buf_sz, "%s%d", first ? "" : ",", -(i + 1) * 8);
3608                 first = false;
3609                 buf += n;
3610                 buf_sz -= n;
3611                 if (buf_sz < 0)
3612                         break;
3613         }
3614 }
3615
3616 /* For given verifier state backtrack_insn() is called from the last insn to
3617  * the first insn. Its purpose is to compute a bitmask of registers and
3618  * stack slots that needs precision in the parent verifier state.
3619  *
3620  * @idx is an index of the instruction we are currently processing;
3621  * @subseq_idx is an index of the subsequent instruction that:
3622  *   - *would be* executed next, if jump history is viewed in forward order;
3623  *   - *was* processed previously during backtracking.
3624  */
3625 static int backtrack_insn(struct bpf_verifier_env *env, int idx, int subseq_idx,
3626                           struct backtrack_state *bt)
3627 {
3628         const struct bpf_insn_cbs cbs = {
3629                 .cb_call        = disasm_kfunc_name,
3630                 .cb_print       = verbose,
3631                 .private_data   = env,
3632         };
3633         struct bpf_insn *insn = env->prog->insnsi + idx;
3634         u8 class = BPF_CLASS(insn->code);
3635         u8 opcode = BPF_OP(insn->code);
3636         u8 mode = BPF_MODE(insn->code);
3637         u32 dreg = insn->dst_reg;
3638         u32 sreg = insn->src_reg;
3639         u32 spi, i;
3640
3641         if (insn->code == 0)
3642                 return 0;
3643         if (env->log.level & BPF_LOG_LEVEL2) {
3644                 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_reg_mask(bt));
3645                 verbose(env, "mark_precise: frame%d: regs=%s ",
3646                         bt->frame, env->tmp_str_buf);
3647                 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN, bt_stack_mask(bt));
3648                 verbose(env, "stack=%s before ", env->tmp_str_buf);
3649                 verbose(env, "%d: ", idx);
3650                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
3651         }
3652
3653         if (class == BPF_ALU || class == BPF_ALU64) {
3654                 if (!bt_is_reg_set(bt, dreg))
3655                         return 0;
3656                 if (opcode == BPF_END || opcode == BPF_NEG) {
3657                         /* sreg is reserved and unused
3658                          * dreg still need precision before this insn
3659                          */
3660                         return 0;
3661                 } else if (opcode == BPF_MOV) {
3662                         if (BPF_SRC(insn->code) == BPF_X) {
3663                                 /* dreg = sreg or dreg = (s8, s16, s32)sreg
3664                                  * dreg needs precision after this insn
3665                                  * sreg needs precision before this insn
3666                                  */
3667                                 bt_clear_reg(bt, dreg);
3668                                 bt_set_reg(bt, sreg);
3669                         } else {
3670                                 /* dreg = K
3671                                  * dreg needs precision after this insn.
3672                                  * Corresponding register is already marked
3673                                  * as precise=true in this verifier state.
3674                                  * No further markings in parent are necessary
3675                                  */
3676                                 bt_clear_reg(bt, dreg);
3677                         }
3678                 } else {
3679                         if (BPF_SRC(insn->code) == BPF_X) {
3680                                 /* dreg += sreg
3681                                  * both dreg and sreg need precision
3682                                  * before this insn
3683                                  */
3684                                 bt_set_reg(bt, sreg);
3685                         } /* else dreg += K
3686                            * dreg still needs precision before this insn
3687                            */
3688                 }
3689         } else if (class == BPF_LDX) {
3690                 if (!bt_is_reg_set(bt, dreg))
3691                         return 0;
3692                 bt_clear_reg(bt, dreg);
3693
3694                 /* scalars can only be spilled into stack w/o losing precision.
3695                  * Load from any other memory can be zero extended.
3696                  * The desire to keep that precision is already indicated
3697                  * by 'precise' mark in corresponding register of this state.
3698                  * No further tracking necessary.
3699                  */
3700                 if (insn->src_reg != BPF_REG_FP)
3701                         return 0;
3702
3703                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
3704                  * that [fp - off] slot contains scalar that needs to be
3705                  * tracked with precision
3706                  */
3707                 spi = (-insn->off - 1) / BPF_REG_SIZE;
3708                 if (spi >= 64) {
3709                         verbose(env, "BUG spi %d\n", spi);
3710                         WARN_ONCE(1, "verifier backtracking bug");
3711                         return -EFAULT;
3712                 }
3713                 bt_set_slot(bt, spi);
3714         } else if (class == BPF_STX || class == BPF_ST) {
3715                 if (bt_is_reg_set(bt, dreg))
3716                         /* stx & st shouldn't be using _scalar_ dst_reg
3717                          * to access memory. It means backtracking
3718                          * encountered a case of pointer subtraction.
3719                          */
3720                         return -ENOTSUPP;
3721                 /* scalars can only be spilled into stack */
3722                 if (insn->dst_reg != BPF_REG_FP)
3723                         return 0;
3724                 spi = (-insn->off - 1) / BPF_REG_SIZE;
3725                 if (spi >= 64) {
3726                         verbose(env, "BUG spi %d\n", spi);
3727                         WARN_ONCE(1, "verifier backtracking bug");
3728                         return -EFAULT;
3729                 }
3730                 if (!bt_is_slot_set(bt, spi))
3731                         return 0;
3732                 bt_clear_slot(bt, spi);
3733                 if (class == BPF_STX)
3734                         bt_set_reg(bt, sreg);
3735         } else if (class == BPF_JMP || class == BPF_JMP32) {
3736                 if (bpf_pseudo_call(insn)) {
3737                         int subprog_insn_idx, subprog;
3738
3739                         subprog_insn_idx = idx + insn->imm + 1;
3740                         subprog = find_subprog(env, subprog_insn_idx);
3741                         if (subprog < 0)
3742                                 return -EFAULT;
3743
3744                         if (subprog_is_global(env, subprog)) {
3745                                 /* check that jump history doesn't have any
3746                                  * extra instructions from subprog; the next
3747                                  * instruction after call to global subprog
3748                                  * should be literally next instruction in
3749                                  * caller program
3750                                  */
3751                                 WARN_ONCE(idx + 1 != subseq_idx, "verifier backtracking bug");
3752                                 /* r1-r5 are invalidated after subprog call,
3753                                  * so for global func call it shouldn't be set
3754                                  * anymore
3755                                  */
3756                                 if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3757                                         verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3758                                         WARN_ONCE(1, "verifier backtracking bug");
3759                                         return -EFAULT;
3760                                 }
3761                                 /* global subprog always sets R0 */
3762                                 bt_clear_reg(bt, BPF_REG_0);
3763                                 return 0;
3764                         } else {
3765                                 /* static subprog call instruction, which
3766                                  * means that we are exiting current subprog,
3767                                  * so only r1-r5 could be still requested as
3768                                  * precise, r0 and r6-r10 or any stack slot in
3769                                  * the current frame should be zero by now
3770                                  */
3771                                 if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3772                                         verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3773                                         WARN_ONCE(1, "verifier backtracking bug");
3774                                         return -EFAULT;
3775                                 }
3776                                 /* we don't track register spills perfectly,
3777                                  * so fallback to force-precise instead of failing */
3778                                 if (bt_stack_mask(bt) != 0)
3779                                         return -ENOTSUPP;
3780                                 /* propagate r1-r5 to the caller */
3781                                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
3782                                         if (bt_is_reg_set(bt, i)) {
3783                                                 bt_clear_reg(bt, i);
3784                                                 bt_set_frame_reg(bt, bt->frame - 1, i);
3785                                         }
3786                                 }
3787                                 if (bt_subprog_exit(bt))
3788                                         return -EFAULT;
3789                                 return 0;
3790                         }
3791                 } else if ((bpf_helper_call(insn) &&
3792                             is_callback_calling_function(insn->imm) &&
3793                             !is_async_callback_calling_function(insn->imm)) ||
3794                            (bpf_pseudo_kfunc_call(insn) && is_callback_calling_kfunc(insn->imm))) {
3795                         /* callback-calling helper or kfunc call, which means
3796                          * we are exiting from subprog, but unlike the subprog
3797                          * call handling above, we shouldn't propagate
3798                          * precision of r1-r5 (if any requested), as they are
3799                          * not actually arguments passed directly to callback
3800                          * subprogs
3801                          */
3802                         if (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) {
3803                                 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3804                                 WARN_ONCE(1, "verifier backtracking bug");
3805                                 return -EFAULT;
3806                         }
3807                         if (bt_stack_mask(bt) != 0)
3808                                 return -ENOTSUPP;
3809                         /* clear r1-r5 in callback subprog's mask */
3810                         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
3811                                 bt_clear_reg(bt, i);
3812                         if (bt_subprog_exit(bt))
3813                                 return -EFAULT;
3814                         return 0;
3815                 } else if (opcode == BPF_CALL) {
3816                         /* kfunc with imm==0 is invalid and fixup_kfunc_call will
3817                          * catch this error later. Make backtracking conservative
3818                          * with ENOTSUPP.
3819                          */
3820                         if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL && insn->imm == 0)
3821                                 return -ENOTSUPP;
3822                         /* regular helper call sets R0 */
3823                         bt_clear_reg(bt, BPF_REG_0);
3824                         if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3825                                 /* if backtracing was looking for registers R1-R5
3826                                  * they should have been found already.
3827                                  */
3828                                 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3829                                 WARN_ONCE(1, "verifier backtracking bug");
3830                                 return -EFAULT;
3831                         }
3832                 } else if (opcode == BPF_EXIT) {
3833                         bool r0_precise;
3834
3835                         if (bt_reg_mask(bt) & BPF_REGMASK_ARGS) {
3836                                 /* if backtracing was looking for registers R1-R5
3837                                  * they should have been found already.
3838                                  */
3839                                 verbose(env, "BUG regs %x\n", bt_reg_mask(bt));
3840                                 WARN_ONCE(1, "verifier backtracking bug");
3841                                 return -EFAULT;
3842                         }
3843
3844                         /* BPF_EXIT in subprog or callback always returns
3845                          * right after the call instruction, so by checking
3846                          * whether the instruction at subseq_idx-1 is subprog
3847                          * call or not we can distinguish actual exit from
3848                          * *subprog* from exit from *callback*. In the former
3849                          * case, we need to propagate r0 precision, if
3850                          * necessary. In the former we never do that.
3851                          */
3852                         r0_precise = subseq_idx - 1 >= 0 &&
3853                                      bpf_pseudo_call(&env->prog->insnsi[subseq_idx - 1]) &&
3854                                      bt_is_reg_set(bt, BPF_REG_0);
3855
3856                         bt_clear_reg(bt, BPF_REG_0);
3857                         if (bt_subprog_enter(bt))
3858                                 return -EFAULT;
3859
3860                         if (r0_precise)
3861                                 bt_set_reg(bt, BPF_REG_0);
3862                         /* r6-r9 and stack slots will stay set in caller frame
3863                          * bitmasks until we return back from callee(s)
3864                          */
3865                         return 0;
3866                 } else if (BPF_SRC(insn->code) == BPF_X) {
3867                         if (!bt_is_reg_set(bt, dreg) && !bt_is_reg_set(bt, sreg))
3868                                 return 0;
3869                         /* dreg <cond> sreg
3870                          * Both dreg and sreg need precision before
3871                          * this insn. If only sreg was marked precise
3872                          * before it would be equally necessary to
3873                          * propagate it to dreg.
3874                          */
3875                         bt_set_reg(bt, dreg);
3876                         bt_set_reg(bt, sreg);
3877                          /* else dreg <cond> K
3878                           * Only dreg still needs precision before
3879                           * this insn, so for the K-based conditional
3880                           * there is nothing new to be marked.
3881                           */
3882                 }
3883         } else if (class == BPF_LD) {
3884                 if (!bt_is_reg_set(bt, dreg))
3885                         return 0;
3886                 bt_clear_reg(bt, dreg);
3887                 /* It's ld_imm64 or ld_abs or ld_ind.
3888                  * For ld_imm64 no further tracking of precision
3889                  * into parent is necessary
3890                  */
3891                 if (mode == BPF_IND || mode == BPF_ABS)
3892                         /* to be analyzed */
3893                         return -ENOTSUPP;
3894         }
3895         return 0;
3896 }
3897
3898 /* the scalar precision tracking algorithm:
3899  * . at the start all registers have precise=false.
3900  * . scalar ranges are tracked as normal through alu and jmp insns.
3901  * . once precise value of the scalar register is used in:
3902  *   .  ptr + scalar alu
3903  *   . if (scalar cond K|scalar)
3904  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
3905  *   backtrack through the verifier states and mark all registers and
3906  *   stack slots with spilled constants that these scalar regisers
3907  *   should be precise.
3908  * . during state pruning two registers (or spilled stack slots)
3909  *   are equivalent if both are not precise.
3910  *
3911  * Note the verifier cannot simply walk register parentage chain,
3912  * since many different registers and stack slots could have been
3913  * used to compute single precise scalar.
3914  *
3915  * The approach of starting with precise=true for all registers and then
3916  * backtrack to mark a register as not precise when the verifier detects
3917  * that program doesn't care about specific value (e.g., when helper
3918  * takes register as ARG_ANYTHING parameter) is not safe.
3919  *
3920  * It's ok to walk single parentage chain of the verifier states.
3921  * It's possible that this backtracking will go all the way till 1st insn.
3922  * All other branches will be explored for needing precision later.
3923  *
3924  * The backtracking needs to deal with cases like:
3925  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
3926  * r9 -= r8
3927  * r5 = r9
3928  * if r5 > 0x79f goto pc+7
3929  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
3930  * r5 += 1
3931  * ...
3932  * call bpf_perf_event_output#25
3933  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
3934  *
3935  * and this case:
3936  * r6 = 1
3937  * call foo // uses callee's r6 inside to compute r0
3938  * r0 += r6
3939  * if r0 == 0 goto
3940  *
3941  * to track above reg_mask/stack_mask needs to be independent for each frame.
3942  *
3943  * Also if parent's curframe > frame where backtracking started,
3944  * the verifier need to mark registers in both frames, otherwise callees
3945  * may incorrectly prune callers. This is similar to
3946  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
3947  *
3948  * For now backtracking falls back into conservative marking.
3949  */
3950 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
3951                                      struct bpf_verifier_state *st)
3952 {
3953         struct bpf_func_state *func;
3954         struct bpf_reg_state *reg;
3955         int i, j;
3956
3957         if (env->log.level & BPF_LOG_LEVEL2) {
3958                 verbose(env, "mark_precise: frame%d: falling back to forcing all scalars precise\n",
3959                         st->curframe);
3960         }
3961
3962         /* big hammer: mark all scalars precise in this path.
3963          * pop_stack may still get !precise scalars.
3964          * We also skip current state and go straight to first parent state,
3965          * because precision markings in current non-checkpointed state are
3966          * not needed. See why in the comment in __mark_chain_precision below.
3967          */
3968         for (st = st->parent; st; st = st->parent) {
3969                 for (i = 0; i <= st->curframe; i++) {
3970                         func = st->frame[i];
3971                         for (j = 0; j < BPF_REG_FP; j++) {
3972                                 reg = &func->regs[j];
3973                                 if (reg->type != SCALAR_VALUE || reg->precise)
3974                                         continue;
3975                                 reg->precise = true;
3976                                 if (env->log.level & BPF_LOG_LEVEL2) {
3977                                         verbose(env, "force_precise: frame%d: forcing r%d to be precise\n",
3978                                                 i, j);
3979                                 }
3980                         }
3981                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
3982                                 if (!is_spilled_reg(&func->stack[j]))
3983                                         continue;
3984                                 reg = &func->stack[j].spilled_ptr;
3985                                 if (reg->type != SCALAR_VALUE || reg->precise)
3986                                         continue;
3987                                 reg->precise = true;
3988                                 if (env->log.level & BPF_LOG_LEVEL2) {
3989                                         verbose(env, "force_precise: frame%d: forcing fp%d to be precise\n",
3990                                                 i, -(j + 1) * 8);
3991                                 }
3992                         }
3993                 }
3994         }
3995 }
3996
3997 static void mark_all_scalars_imprecise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
3998 {
3999         struct bpf_func_state *func;
4000         struct bpf_reg_state *reg;
4001         int i, j;
4002
4003         for (i = 0; i <= st->curframe; i++) {
4004                 func = st->frame[i];
4005                 for (j = 0; j < BPF_REG_FP; j++) {
4006                         reg = &func->regs[j];
4007                         if (reg->type != SCALAR_VALUE)
4008                                 continue;
4009                         reg->precise = false;
4010                 }
4011                 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
4012                         if (!is_spilled_reg(&func->stack[j]))
4013                                 continue;
4014                         reg = &func->stack[j].spilled_ptr;
4015                         if (reg->type != SCALAR_VALUE)
4016                                 continue;
4017                         reg->precise = false;
4018                 }
4019         }
4020 }
4021
4022 static bool idset_contains(struct bpf_idset *s, u32 id)
4023 {
4024         u32 i;
4025
4026         for (i = 0; i < s->count; ++i)
4027                 if (s->ids[i] == id)
4028                         return true;
4029
4030         return false;
4031 }
4032
4033 static int idset_push(struct bpf_idset *s, u32 id)
4034 {
4035         if (WARN_ON_ONCE(s->count >= ARRAY_SIZE(s->ids)))
4036                 return -EFAULT;
4037         s->ids[s->count++] = id;
4038         return 0;
4039 }
4040
4041 static void idset_reset(struct bpf_idset *s)
4042 {
4043         s->count = 0;
4044 }
4045
4046 /* Collect a set of IDs for all registers currently marked as precise in env->bt.
4047  * Mark all registers with these IDs as precise.
4048  */
4049 static int mark_precise_scalar_ids(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
4050 {
4051         struct bpf_idset *precise_ids = &env->idset_scratch;
4052         struct backtrack_state *bt = &env->bt;
4053         struct bpf_func_state *func;
4054         struct bpf_reg_state *reg;
4055         DECLARE_BITMAP(mask, 64);
4056         int i, fr;
4057
4058         idset_reset(precise_ids);
4059
4060         for (fr = bt->frame; fr >= 0; fr--) {
4061                 func = st->frame[fr];
4062
4063                 bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4064                 for_each_set_bit(i, mask, 32) {
4065                         reg = &func->regs[i];
4066                         if (!reg->id || reg->type != SCALAR_VALUE)
4067                                 continue;
4068                         if (idset_push(precise_ids, reg->id))
4069                                 return -EFAULT;
4070                 }
4071
4072                 bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4073                 for_each_set_bit(i, mask, 64) {
4074                         if (i >= func->allocated_stack / BPF_REG_SIZE)
4075                                 break;
4076                         if (!is_spilled_scalar_reg(&func->stack[i]))
4077                                 continue;
4078                         reg = &func->stack[i].spilled_ptr;
4079                         if (!reg->id)
4080                                 continue;
4081                         if (idset_push(precise_ids, reg->id))
4082                                 return -EFAULT;
4083                 }
4084         }
4085
4086         for (fr = 0; fr <= st->curframe; ++fr) {
4087                 func = st->frame[fr];
4088
4089                 for (i = BPF_REG_0; i < BPF_REG_10; ++i) {
4090                         reg = &func->regs[i];
4091                         if (!reg->id)
4092                                 continue;
4093                         if (!idset_contains(precise_ids, reg->id))
4094                                 continue;
4095                         bt_set_frame_reg(bt, fr, i);
4096                 }
4097                 for (i = 0; i < func->allocated_stack / BPF_REG_SIZE; ++i) {
4098                         if (!is_spilled_scalar_reg(&func->stack[i]))
4099                                 continue;
4100                         reg = &func->stack[i].spilled_ptr;
4101                         if (!reg->id)
4102                                 continue;
4103                         if (!idset_contains(precise_ids, reg->id))
4104                                 continue;
4105                         bt_set_frame_slot(bt, fr, i);
4106                 }
4107         }
4108
4109         return 0;
4110 }
4111
4112 /*
4113  * __mark_chain_precision() backtracks BPF program instruction sequence and
4114  * chain of verifier states making sure that register *regno* (if regno >= 0)
4115  * and/or stack slot *spi* (if spi >= 0) are marked as precisely tracked
4116  * SCALARS, as well as any other registers and slots that contribute to
4117  * a tracked state of given registers/stack slots, depending on specific BPF
4118  * assembly instructions (see backtrack_insns() for exact instruction handling
4119  * logic). This backtracking relies on recorded jmp_history and is able to
4120  * traverse entire chain of parent states. This process ends only when all the
4121  * necessary registers/slots and their transitive dependencies are marked as
4122  * precise.
4123  *
4124  * One important and subtle aspect is that precise marks *do not matter* in
4125  * the currently verified state (current state). It is important to understand
4126  * why this is the case.
4127  *
4128  * First, note that current state is the state that is not yet "checkpointed",
4129  * i.e., it is not yet put into env->explored_states, and it has no children
4130  * states as well. It's ephemeral, and can end up either a) being discarded if
4131  * compatible explored state is found at some point or BPF_EXIT instruction is
4132  * reached or b) checkpointed and put into env->explored_states, branching out
4133  * into one or more children states.
4134  *
4135  * In the former case, precise markings in current state are completely
4136  * ignored by state comparison code (see regsafe() for details). Only
4137  * checkpointed ("old") state precise markings are important, and if old
4138  * state's register/slot is precise, regsafe() assumes current state's
4139  * register/slot as precise and checks value ranges exactly and precisely. If
4140  * states turn out to be compatible, current state's necessary precise
4141  * markings and any required parent states' precise markings are enforced
4142  * after the fact with propagate_precision() logic, after the fact. But it's
4143  * important to realize that in this case, even after marking current state
4144  * registers/slots as precise, we immediately discard current state. So what
4145  * actually matters is any of the precise markings propagated into current
4146  * state's parent states, which are always checkpointed (due to b) case above).
4147  * As such, for scenario a) it doesn't matter if current state has precise
4148  * markings set or not.
4149  *
4150  * Now, for the scenario b), checkpointing and forking into child(ren)
4151  * state(s). Note that before current state gets to checkpointing step, any
4152  * processed instruction always assumes precise SCALAR register/slot
4153  * knowledge: if precise value or range is useful to prune jump branch, BPF
4154  * verifier takes this opportunity enthusiastically. Similarly, when
4155  * register's value is used to calculate offset or memory address, exact
4156  * knowledge of SCALAR range is assumed, checked, and enforced. So, similar to
4157  * what we mentioned above about state comparison ignoring precise markings
4158  * during state comparison, BPF verifier ignores and also assumes precise
4159  * markings *at will* during instruction verification process. But as verifier
4160  * assumes precision, it also propagates any precision dependencies across
4161  * parent states, which are not yet finalized, so can be further restricted
4162  * based on new knowledge gained from restrictions enforced by their children
4163  * states. This is so that once those parent states are finalized, i.e., when
4164  * they have no more active children state, state comparison logic in
4165  * is_state_visited() would enforce strict and precise SCALAR ranges, if
4166  * required for correctness.
4167  *
4168  * To build a bit more intuition, note also that once a state is checkpointed,
4169  * the path we took to get to that state is not important. This is crucial
4170  * property for state pruning. When state is checkpointed and finalized at
4171  * some instruction index, it can be correctly and safely used to "short
4172  * circuit" any *compatible* state that reaches exactly the same instruction
4173  * index. I.e., if we jumped to that instruction from a completely different
4174  * code path than original finalized state was derived from, it doesn't
4175  * matter, current state can be discarded because from that instruction
4176  * forward having a compatible state will ensure we will safely reach the
4177  * exit. States describe preconditions for further exploration, but completely
4178  * forget the history of how we got here.
4179  *
4180  * This also means that even if we needed precise SCALAR range to get to
4181  * finalized state, but from that point forward *that same* SCALAR register is
4182  * never used in a precise context (i.e., it's precise value is not needed for
4183  * correctness), it's correct and safe to mark such register as "imprecise"
4184  * (i.e., precise marking set to false). This is what we rely on when we do
4185  * not set precise marking in current state. If no child state requires
4186  * precision for any given SCALAR register, it's safe to dictate that it can
4187  * be imprecise. If any child state does require this register to be precise,
4188  * we'll mark it precise later retroactively during precise markings
4189  * propagation from child state to parent states.
4190  *
4191  * Skipping precise marking setting in current state is a mild version of
4192  * relying on the above observation. But we can utilize this property even
4193  * more aggressively by proactively forgetting any precise marking in the
4194  * current state (which we inherited from the parent state), right before we
4195  * checkpoint it and branch off into new child state. This is done by
4196  * mark_all_scalars_imprecise() to hopefully get more permissive and generic
4197  * finalized states which help in short circuiting more future states.
4198  */
4199 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno)
4200 {
4201         struct backtrack_state *bt = &env->bt;
4202         struct bpf_verifier_state *st = env->cur_state;
4203         int first_idx = st->first_insn_idx;
4204         int last_idx = env->insn_idx;
4205         int subseq_idx = -1;
4206         struct bpf_func_state *func;
4207         struct bpf_reg_state *reg;
4208         bool skip_first = true;
4209         int i, fr, err;
4210
4211         if (!env->bpf_capable)
4212                 return 0;
4213
4214         /* set frame number from which we are starting to backtrack */
4215         bt_init(bt, env->cur_state->curframe);
4216
4217         /* Do sanity checks against current state of register and/or stack
4218          * slot, but don't set precise flag in current state, as precision
4219          * tracking in the current state is unnecessary.
4220          */
4221         func = st->frame[bt->frame];
4222         if (regno >= 0) {
4223                 reg = &func->regs[regno];
4224                 if (reg->type != SCALAR_VALUE) {
4225                         WARN_ONCE(1, "backtracing misuse");
4226                         return -EFAULT;
4227                 }
4228                 bt_set_reg(bt, regno);
4229         }
4230
4231         if (bt_empty(bt))
4232                 return 0;
4233
4234         for (;;) {
4235                 DECLARE_BITMAP(mask, 64);
4236                 u32 history = st->jmp_history_cnt;
4237
4238                 if (env->log.level & BPF_LOG_LEVEL2) {
4239                         verbose(env, "mark_precise: frame%d: last_idx %d first_idx %d subseq_idx %d \n",
4240                                 bt->frame, last_idx, first_idx, subseq_idx);
4241                 }
4242
4243                 /* If some register with scalar ID is marked as precise,
4244                  * make sure that all registers sharing this ID are also precise.
4245                  * This is needed to estimate effect of find_equal_scalars().
4246                  * Do this at the last instruction of each state,
4247                  * bpf_reg_state::id fields are valid for these instructions.
4248                  *
4249                  * Allows to track precision in situation like below:
4250                  *
4251                  *     r2 = unknown value
4252                  *     ...
4253                  *   --- state #0 ---
4254                  *     ...
4255                  *     r1 = r2                 // r1 and r2 now share the same ID
4256                  *     ...
4257                  *   --- state #1 {r1.id = A, r2.id = A} ---
4258                  *     ...
4259                  *     if (r2 > 10) goto exit; // find_equal_scalars() assigns range to r1
4260                  *     ...
4261                  *   --- state #2 {r1.id = A, r2.id = A} ---
4262                  *     r3 = r10
4263                  *     r3 += r1                // need to mark both r1 and r2
4264                  */
4265                 if (mark_precise_scalar_ids(env, st))
4266                         return -EFAULT;
4267
4268                 if (last_idx < 0) {
4269                         /* we are at the entry into subprog, which
4270                          * is expected for global funcs, but only if
4271                          * requested precise registers are R1-R5
4272                          * (which are global func's input arguments)
4273                          */
4274                         if (st->curframe == 0 &&
4275                             st->frame[0]->subprogno > 0 &&
4276                             st->frame[0]->callsite == BPF_MAIN_FUNC &&
4277                             bt_stack_mask(bt) == 0 &&
4278                             (bt_reg_mask(bt) & ~BPF_REGMASK_ARGS) == 0) {
4279                                 bitmap_from_u64(mask, bt_reg_mask(bt));
4280                                 for_each_set_bit(i, mask, 32) {
4281                                         reg = &st->frame[0]->regs[i];
4282                                         bt_clear_reg(bt, i);
4283                                         if (reg->type == SCALAR_VALUE)
4284                                                 reg->precise = true;
4285                                 }
4286                                 return 0;
4287                         }
4288
4289                         verbose(env, "BUG backtracking func entry subprog %d reg_mask %x stack_mask %llx\n",
4290                                 st->frame[0]->subprogno, bt_reg_mask(bt), bt_stack_mask(bt));
4291                         WARN_ONCE(1, "verifier backtracking bug");
4292                         return -EFAULT;
4293                 }
4294
4295                 for (i = last_idx;;) {
4296                         if (skip_first) {
4297                                 err = 0;
4298                                 skip_first = false;
4299                         } else {
4300                                 err = backtrack_insn(env, i, subseq_idx, bt);
4301                         }
4302                         if (err == -ENOTSUPP) {
4303                                 mark_all_scalars_precise(env, env->cur_state);
4304                                 bt_reset(bt);
4305                                 return 0;
4306                         } else if (err) {
4307                                 return err;
4308                         }
4309                         if (bt_empty(bt))
4310                                 /* Found assignment(s) into tracked register in this state.
4311                                  * Since this state is already marked, just return.
4312                                  * Nothing to be tracked further in the parent state.
4313                                  */
4314                                 return 0;
4315                         subseq_idx = i;
4316                         i = get_prev_insn_idx(st, i, &history);
4317                         if (i == -ENOENT)
4318                                 break;
4319                         if (i >= env->prog->len) {
4320                                 /* This can happen if backtracking reached insn 0
4321                                  * and there are still reg_mask or stack_mask
4322                                  * to backtrack.
4323                                  * It means the backtracking missed the spot where
4324                                  * particular register was initialized with a constant.
4325                                  */
4326                                 verbose(env, "BUG backtracking idx %d\n", i);
4327                                 WARN_ONCE(1, "verifier backtracking bug");
4328                                 return -EFAULT;
4329                         }
4330                 }
4331                 st = st->parent;
4332                 if (!st)
4333                         break;
4334
4335                 for (fr = bt->frame; fr >= 0; fr--) {
4336                         func = st->frame[fr];
4337                         bitmap_from_u64(mask, bt_frame_reg_mask(bt, fr));
4338                         for_each_set_bit(i, mask, 32) {
4339                                 reg = &func->regs[i];
4340                                 if (reg->type != SCALAR_VALUE) {
4341                                         bt_clear_frame_reg(bt, fr, i);
4342                                         continue;
4343                                 }
4344                                 if (reg->precise)
4345                                         bt_clear_frame_reg(bt, fr, i);
4346                                 else
4347                                         reg->precise = true;
4348                         }
4349
4350                         bitmap_from_u64(mask, bt_frame_stack_mask(bt, fr));
4351                         for_each_set_bit(i, mask, 64) {
4352                                 if (i >= func->allocated_stack / BPF_REG_SIZE) {
4353                                         /* the sequence of instructions:
4354                                          * 2: (bf) r3 = r10
4355                                          * 3: (7b) *(u64 *)(r3 -8) = r0
4356                                          * 4: (79) r4 = *(u64 *)(r10 -8)
4357                                          * doesn't contain jmps. It's backtracked
4358                                          * as a single block.
4359                                          * During backtracking insn 3 is not recognized as
4360                                          * stack access, so at the end of backtracking
4361                                          * stack slot fp-8 is still marked in stack_mask.
4362                                          * However the parent state may not have accessed
4363                                          * fp-8 and it's "unallocated" stack space.
4364                                          * In such case fallback to conservative.
4365                                          */
4366                                         mark_all_scalars_precise(env, env->cur_state);
4367                                         bt_reset(bt);
4368                                         return 0;
4369                                 }
4370
4371                                 if (!is_spilled_scalar_reg(&func->stack[i])) {
4372                                         bt_clear_frame_slot(bt, fr, i);
4373                                         continue;
4374                                 }
4375                                 reg = &func->stack[i].spilled_ptr;
4376                                 if (reg->precise)
4377                                         bt_clear_frame_slot(bt, fr, i);
4378                                 else
4379                                         reg->precise = true;
4380                         }
4381                         if (env->log.level & BPF_LOG_LEVEL2) {
4382                                 fmt_reg_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4383                                              bt_frame_reg_mask(bt, fr));
4384                                 verbose(env, "mark_precise: frame%d: parent state regs=%s ",
4385                                         fr, env->tmp_str_buf);
4386                                 fmt_stack_mask(env->tmp_str_buf, TMP_STR_BUF_LEN,
4387                                                bt_frame_stack_mask(bt, fr));
4388                                 verbose(env, "stack=%s: ", env->tmp_str_buf);
4389                                 print_verifier_state(env, func, true);
4390                         }
4391                 }
4392
4393                 if (bt_empty(bt))
4394                         return 0;
4395
4396                 subseq_idx = first_idx;
4397                 last_idx = st->last_insn_idx;
4398                 first_idx = st->first_insn_idx;
4399         }
4400
4401         /* if we still have requested precise regs or slots, we missed
4402          * something (e.g., stack access through non-r10 register), so
4403          * fallback to marking all precise
4404          */
4405         if (!bt_empty(bt)) {
4406                 mark_all_scalars_precise(env, env->cur_state);
4407                 bt_reset(bt);
4408         }
4409
4410         return 0;
4411 }
4412
4413 int mark_chain_precision(struct bpf_verifier_env *env, int regno)
4414 {
4415         return __mark_chain_precision(env, regno);
4416 }
4417
4418 /* mark_chain_precision_batch() assumes that env->bt is set in the caller to
4419  * desired reg and stack masks across all relevant frames
4420  */
4421 static int mark_chain_precision_batch(struct bpf_verifier_env *env)
4422 {
4423         return __mark_chain_precision(env, -1);
4424 }
4425
4426 static bool is_spillable_regtype(enum bpf_reg_type type)
4427 {
4428         switch (base_type(type)) {
4429         case PTR_TO_MAP_VALUE:
4430         case PTR_TO_STACK:
4431         case PTR_TO_CTX:
4432         case PTR_TO_PACKET:
4433         case PTR_TO_PACKET_META:
4434         case PTR_TO_PACKET_END:
4435         case PTR_TO_FLOW_KEYS:
4436         case CONST_PTR_TO_MAP:
4437         case PTR_TO_SOCKET:
4438         case PTR_TO_SOCK_COMMON:
4439         case PTR_TO_TCP_SOCK:
4440         case PTR_TO_XDP_SOCK:
4441         case PTR_TO_BTF_ID:
4442         case PTR_TO_BUF:
4443         case PTR_TO_MEM:
4444         case PTR_TO_FUNC:
4445         case PTR_TO_MAP_KEY:
4446                 return true;
4447         default:
4448                 return false;
4449         }
4450 }
4451
4452 /* Does this register contain a constant zero? */
4453 static bool register_is_null(struct bpf_reg_state *reg)
4454 {
4455         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
4456 }
4457
4458 static bool register_is_const(struct bpf_reg_state *reg)
4459 {
4460         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
4461 }
4462
4463 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
4464 {
4465         return tnum_is_unknown(reg->var_off) &&
4466                reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
4467                reg->umin_value == 0 && reg->umax_value == U64_MAX &&
4468                reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
4469                reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
4470 }
4471
4472 static bool register_is_bounded(struct bpf_reg_state *reg)
4473 {
4474         return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
4475 }
4476
4477 static bool __is_pointer_value(bool allow_ptr_leaks,
4478                                const struct bpf_reg_state *reg)
4479 {
4480         if (allow_ptr_leaks)
4481                 return false;
4482
4483         return reg->type != SCALAR_VALUE;
4484 }
4485
4486 /* Copy src state preserving dst->parent and dst->live fields */
4487 static void copy_register_state(struct bpf_reg_state *dst, const struct bpf_reg_state *src)
4488 {
4489         struct bpf_reg_state *parent = dst->parent;
4490         enum bpf_reg_liveness live = dst->live;
4491
4492         *dst = *src;
4493         dst->parent = parent;
4494         dst->live = live;
4495 }
4496
4497 static void save_register_state(struct bpf_func_state *state,
4498                                 int spi, struct bpf_reg_state *reg,
4499                                 int size)
4500 {
4501         int i;
4502
4503         copy_register_state(&state->stack[spi].spilled_ptr, reg);
4504         if (size == BPF_REG_SIZE)
4505                 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4506
4507         for (i = BPF_REG_SIZE; i > BPF_REG_SIZE - size; i--)
4508                 state->stack[spi].slot_type[i - 1] = STACK_SPILL;
4509
4510         /* size < 8 bytes spill */
4511         for (; i; i--)
4512                 scrub_spilled_slot(&state->stack[spi].slot_type[i - 1]);
4513 }
4514
4515 static bool is_bpf_st_mem(struct bpf_insn *insn)
4516 {
4517         return BPF_CLASS(insn->code) == BPF_ST && BPF_MODE(insn->code) == BPF_MEM;
4518 }
4519
4520 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
4521  * stack boundary and alignment are checked in check_mem_access()
4522  */
4523 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
4524                                        /* stack frame we're writing to */
4525                                        struct bpf_func_state *state,
4526                                        int off, int size, int value_regno,
4527                                        int insn_idx)
4528 {
4529         struct bpf_func_state *cur; /* state of the current function */
4530         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
4531         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4532         struct bpf_reg_state *reg = NULL;
4533         u32 dst_reg = insn->dst_reg;
4534
4535         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
4536          * so it's aligned access and [off, off + size) are within stack limits
4537          */
4538         if (!env->allow_ptr_leaks &&
4539             is_spilled_reg(&state->stack[spi]) &&
4540             size != BPF_REG_SIZE) {
4541                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
4542                 return -EACCES;
4543         }
4544
4545         cur = env->cur_state->frame[env->cur_state->curframe];
4546         if (value_regno >= 0)
4547                 reg = &cur->regs[value_regno];
4548         if (!env->bypass_spec_v4) {
4549                 bool sanitize = reg && is_spillable_regtype(reg->type);
4550
4551                 for (i = 0; i < size; i++) {
4552                         u8 type = state->stack[spi].slot_type[i];
4553
4554                         if (type != STACK_MISC && type != STACK_ZERO) {
4555                                 sanitize = true;
4556                                 break;
4557                         }
4558                 }
4559
4560                 if (sanitize)
4561                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
4562         }
4563
4564         err = destroy_if_dynptr_stack_slot(env, state, spi);
4565         if (err)
4566                 return err;
4567
4568         mark_stack_slot_scratched(env, spi);
4569         if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
4570             !register_is_null(reg) && env->bpf_capable) {
4571                 if (dst_reg != BPF_REG_FP) {
4572                         /* The backtracking logic can only recognize explicit
4573                          * stack slot address like [fp - 8]. Other spill of
4574                          * scalar via different register has to be conservative.
4575                          * Backtrack from here and mark all registers as precise
4576                          * that contributed into 'reg' being a constant.
4577                          */
4578                         err = mark_chain_precision(env, value_regno);
4579                         if (err)
4580                                 return err;
4581                 }
4582                 save_register_state(state, spi, reg, size);
4583                 /* Break the relation on a narrowing spill. */
4584                 if (fls64(reg->umax_value) > BITS_PER_BYTE * size)
4585                         state->stack[spi].spilled_ptr.id = 0;
4586         } else if (!reg && !(off % BPF_REG_SIZE) && is_bpf_st_mem(insn) &&
4587                    insn->imm != 0 && env->bpf_capable) {
4588                 struct bpf_reg_state fake_reg = {};
4589
4590                 __mark_reg_known(&fake_reg, insn->imm);
4591                 fake_reg.type = SCALAR_VALUE;
4592                 save_register_state(state, spi, &fake_reg, size);
4593         } else if (reg && is_spillable_regtype(reg->type)) {
4594                 /* register containing pointer is being spilled into stack */
4595                 if (size != BPF_REG_SIZE) {
4596                         verbose_linfo(env, insn_idx, "; ");
4597                         verbose(env, "invalid size of register spill\n");
4598                         return -EACCES;
4599                 }
4600                 if (state != cur && reg->type == PTR_TO_STACK) {
4601                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
4602                         return -EINVAL;
4603                 }
4604                 save_register_state(state, spi, reg, size);
4605         } else {
4606                 u8 type = STACK_MISC;
4607
4608                 /* regular write of data into stack destroys any spilled ptr */
4609                 state->stack[spi].spilled_ptr.type = NOT_INIT;
4610                 /* Mark slots as STACK_MISC if they belonged to spilled ptr/dynptr/iter. */
4611                 if (is_stack_slot_special(&state->stack[spi]))
4612                         for (i = 0; i < BPF_REG_SIZE; i++)
4613                                 scrub_spilled_slot(&state->stack[spi].slot_type[i]);
4614
4615                 /* only mark the slot as written if all 8 bytes were written
4616                  * otherwise read propagation may incorrectly stop too soon
4617                  * when stack slots are partially written.
4618                  * This heuristic means that read propagation will be
4619                  * conservative, since it will add reg_live_read marks
4620                  * to stack slots all the way to first state when programs
4621                  * writes+reads less than 8 bytes
4622                  */
4623                 if (size == BPF_REG_SIZE)
4624                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
4625
4626                 /* when we zero initialize stack slots mark them as such */
4627                 if ((reg && register_is_null(reg)) ||
4628                     (!reg && is_bpf_st_mem(insn) && insn->imm == 0)) {
4629                         /* backtracking doesn't work for STACK_ZERO yet. */
4630                         err = mark_chain_precision(env, value_regno);
4631                         if (err)
4632                                 return err;
4633                         type = STACK_ZERO;
4634                 }
4635
4636                 /* Mark slots affected by this stack write. */
4637                 for (i = 0; i < size; i++)
4638                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
4639                                 type;
4640         }
4641         return 0;
4642 }
4643
4644 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
4645  * known to contain a variable offset.
4646  * This function checks whether the write is permitted and conservatively
4647  * tracks the effects of the write, considering that each stack slot in the
4648  * dynamic range is potentially written to.
4649  *
4650  * 'off' includes 'regno->off'.
4651  * 'value_regno' can be -1, meaning that an unknown value is being written to
4652  * the stack.
4653  *
4654  * Spilled pointers in range are not marked as written because we don't know
4655  * what's going to be actually written. This means that read propagation for
4656  * future reads cannot be terminated by this write.
4657  *
4658  * For privileged programs, uninitialized stack slots are considered
4659  * initialized by this write (even though we don't know exactly what offsets
4660  * are going to be written to). The idea is that we don't want the verifier to
4661  * reject future reads that access slots written to through variable offsets.
4662  */
4663 static int check_stack_write_var_off(struct bpf_verifier_env *env,
4664                                      /* func where register points to */
4665                                      struct bpf_func_state *state,
4666                                      int ptr_regno, int off, int size,
4667                                      int value_regno, int insn_idx)
4668 {
4669         struct bpf_func_state *cur; /* state of the current function */
4670         int min_off, max_off;
4671         int i, err;
4672         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
4673         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
4674         bool writing_zero = false;
4675         /* set if the fact that we're writing a zero is used to let any
4676          * stack slots remain STACK_ZERO
4677          */
4678         bool zero_used = false;
4679
4680         cur = env->cur_state->frame[env->cur_state->curframe];
4681         ptr_reg = &cur->regs[ptr_regno];
4682         min_off = ptr_reg->smin_value + off;
4683         max_off = ptr_reg->smax_value + off + size;
4684         if (value_regno >= 0)
4685                 value_reg = &cur->regs[value_regno];
4686         if ((value_reg && register_is_null(value_reg)) ||
4687             (!value_reg && is_bpf_st_mem(insn) && insn->imm == 0))
4688                 writing_zero = true;
4689
4690         for (i = min_off; i < max_off; i++) {
4691                 int spi;
4692
4693                 spi = __get_spi(i);
4694                 err = destroy_if_dynptr_stack_slot(env, state, spi);
4695                 if (err)
4696                         return err;
4697         }
4698
4699         /* Variable offset writes destroy any spilled pointers in range. */
4700         for (i = min_off; i < max_off; i++) {
4701                 u8 new_type, *stype;
4702                 int slot, spi;
4703
4704                 slot = -i - 1;
4705                 spi = slot / BPF_REG_SIZE;
4706                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4707                 mark_stack_slot_scratched(env, spi);
4708
4709                 if (!env->allow_ptr_leaks && *stype != STACK_MISC && *stype != STACK_ZERO) {
4710                         /* Reject the write if range we may write to has not
4711                          * been initialized beforehand. If we didn't reject
4712                          * here, the ptr status would be erased below (even
4713                          * though not all slots are actually overwritten),
4714                          * possibly opening the door to leaks.
4715                          *
4716                          * We do however catch STACK_INVALID case below, and
4717                          * only allow reading possibly uninitialized memory
4718                          * later for CAP_PERFMON, as the write may not happen to
4719                          * that slot.
4720                          */
4721                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
4722                                 insn_idx, i);
4723                         return -EINVAL;
4724                 }
4725
4726                 /* Erase all spilled pointers. */
4727                 state->stack[spi].spilled_ptr.type = NOT_INIT;
4728
4729                 /* Update the slot type. */
4730                 new_type = STACK_MISC;
4731                 if (writing_zero && *stype == STACK_ZERO) {
4732                         new_type = STACK_ZERO;
4733                         zero_used = true;
4734                 }
4735                 /* If the slot is STACK_INVALID, we check whether it's OK to
4736                  * pretend that it will be initialized by this write. The slot
4737                  * might not actually be written to, and so if we mark it as
4738                  * initialized future reads might leak uninitialized memory.
4739                  * For privileged programs, we will accept such reads to slots
4740                  * that may or may not be written because, if we're reject
4741                  * them, the error would be too confusing.
4742                  */
4743                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
4744                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
4745                                         insn_idx, i);
4746                         return -EINVAL;
4747                 }
4748                 *stype = new_type;
4749         }
4750         if (zero_used) {
4751                 /* backtracking doesn't work for STACK_ZERO yet. */
4752                 err = mark_chain_precision(env, value_regno);
4753                 if (err)
4754                         return err;
4755         }
4756         return 0;
4757 }
4758
4759 /* When register 'dst_regno' is assigned some values from stack[min_off,
4760  * max_off), we set the register's type according to the types of the
4761  * respective stack slots. If all the stack values are known to be zeros, then
4762  * so is the destination reg. Otherwise, the register is considered to be
4763  * SCALAR. This function does not deal with register filling; the caller must
4764  * ensure that all spilled registers in the stack range have been marked as
4765  * read.
4766  */
4767 static void mark_reg_stack_read(struct bpf_verifier_env *env,
4768                                 /* func where src register points to */
4769                                 struct bpf_func_state *ptr_state,
4770                                 int min_off, int max_off, int dst_regno)
4771 {
4772         struct bpf_verifier_state *vstate = env->cur_state;
4773         struct bpf_func_state *state = vstate->frame[vstate->curframe];
4774         int i, slot, spi;
4775         u8 *stype;
4776         int zeros = 0;
4777
4778         for (i = min_off; i < max_off; i++) {
4779                 slot = -i - 1;
4780                 spi = slot / BPF_REG_SIZE;
4781                 mark_stack_slot_scratched(env, spi);
4782                 stype = ptr_state->stack[spi].slot_type;
4783                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
4784                         break;
4785                 zeros++;
4786         }
4787         if (zeros == max_off - min_off) {
4788                 /* any access_size read into register is zero extended,
4789                  * so the whole register == const_zero
4790                  */
4791                 __mark_reg_const_zero(&state->regs[dst_regno]);
4792                 /* backtracking doesn't support STACK_ZERO yet,
4793                  * so mark it precise here, so that later
4794                  * backtracking can stop here.
4795                  * Backtracking may not need this if this register
4796                  * doesn't participate in pointer adjustment.
4797                  * Forward propagation of precise flag is not
4798                  * necessary either. This mark is only to stop
4799                  * backtracking. Any register that contributed
4800                  * to const 0 was marked precise before spill.
4801                  */
4802                 state->regs[dst_regno].precise = true;
4803         } else {
4804                 /* have read misc data from the stack */
4805                 mark_reg_unknown(env, state->regs, dst_regno);
4806         }
4807         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4808 }
4809
4810 /* Read the stack at 'off' and put the results into the register indicated by
4811  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
4812  * spilled reg.
4813  *
4814  * 'dst_regno' can be -1, meaning that the read value is not going to a
4815  * register.
4816  *
4817  * The access is assumed to be within the current stack bounds.
4818  */
4819 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
4820                                       /* func where src register points to */
4821                                       struct bpf_func_state *reg_state,
4822                                       int off, int size, int dst_regno)
4823 {
4824         struct bpf_verifier_state *vstate = env->cur_state;
4825         struct bpf_func_state *state = vstate->frame[vstate->curframe];
4826         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
4827         struct bpf_reg_state *reg;
4828         u8 *stype, type;
4829
4830         stype = reg_state->stack[spi].slot_type;
4831         reg = &reg_state->stack[spi].spilled_ptr;
4832
4833         mark_stack_slot_scratched(env, spi);
4834
4835         if (is_spilled_reg(&reg_state->stack[spi])) {
4836                 u8 spill_size = 1;
4837
4838                 for (i = BPF_REG_SIZE - 1; i > 0 && stype[i - 1] == STACK_SPILL; i--)
4839                         spill_size++;
4840
4841                 if (size != BPF_REG_SIZE || spill_size != BPF_REG_SIZE) {
4842                         if (reg->type != SCALAR_VALUE) {
4843                                 verbose_linfo(env, env->insn_idx, "; ");
4844                                 verbose(env, "invalid size of register fill\n");
4845                                 return -EACCES;
4846                         }
4847
4848                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4849                         if (dst_regno < 0)
4850                                 return 0;
4851
4852                         if (!(off % BPF_REG_SIZE) && size == spill_size) {
4853                                 /* The earlier check_reg_arg() has decided the
4854                                  * subreg_def for this insn.  Save it first.
4855                                  */
4856                                 s32 subreg_def = state->regs[dst_regno].subreg_def;
4857
4858                                 copy_register_state(&state->regs[dst_regno], reg);
4859                                 state->regs[dst_regno].subreg_def = subreg_def;
4860                         } else {
4861                                 for (i = 0; i < size; i++) {
4862                                         type = stype[(slot - i) % BPF_REG_SIZE];
4863                                         if (type == STACK_SPILL)
4864                                                 continue;
4865                                         if (type == STACK_MISC)
4866                                                 continue;
4867                                         if (type == STACK_INVALID && env->allow_uninit_stack)
4868                                                 continue;
4869                                         verbose(env, "invalid read from stack off %d+%d size %d\n",
4870                                                 off, i, size);
4871                                         return -EACCES;
4872                                 }
4873                                 mark_reg_unknown(env, state->regs, dst_regno);
4874                         }
4875                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4876                         return 0;
4877                 }
4878
4879                 if (dst_regno >= 0) {
4880                         /* restore register state from stack */
4881                         copy_register_state(&state->regs[dst_regno], reg);
4882                         /* mark reg as written since spilled pointer state likely
4883                          * has its liveness marks cleared by is_state_visited()
4884                          * which resets stack/reg liveness for state transitions
4885                          */
4886                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
4887                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
4888                         /* If dst_regno==-1, the caller is asking us whether
4889                          * it is acceptable to use this value as a SCALAR_VALUE
4890                          * (e.g. for XADD).
4891                          * We must not allow unprivileged callers to do that
4892                          * with spilled pointers.
4893                          */
4894                         verbose(env, "leaking pointer from stack off %d\n",
4895                                 off);
4896                         return -EACCES;
4897                 }
4898                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4899         } else {
4900                 for (i = 0; i < size; i++) {
4901                         type = stype[(slot - i) % BPF_REG_SIZE];
4902                         if (type == STACK_MISC)
4903                                 continue;
4904                         if (type == STACK_ZERO)
4905                                 continue;
4906                         if (type == STACK_INVALID && env->allow_uninit_stack)
4907                                 continue;
4908                         verbose(env, "invalid read from stack off %d+%d size %d\n",
4909                                 off, i, size);
4910                         return -EACCES;
4911                 }
4912                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
4913                 if (dst_regno >= 0)
4914                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
4915         }
4916         return 0;
4917 }
4918
4919 enum bpf_access_src {
4920         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
4921         ACCESS_HELPER = 2,  /* the access is performed by a helper */
4922 };
4923
4924 static int check_stack_range_initialized(struct bpf_verifier_env *env,
4925                                          int regno, int off, int access_size,
4926                                          bool zero_size_allowed,
4927                                          enum bpf_access_src type,
4928                                          struct bpf_call_arg_meta *meta);
4929
4930 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
4931 {
4932         return cur_regs(env) + regno;
4933 }
4934
4935 /* Read the stack at 'ptr_regno + off' and put the result into the register
4936  * 'dst_regno'.
4937  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
4938  * but not its variable offset.
4939  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
4940  *
4941  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
4942  * filling registers (i.e. reads of spilled register cannot be detected when
4943  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
4944  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
4945  * offset; for a fixed offset check_stack_read_fixed_off should be used
4946  * instead.
4947  */
4948 static int check_stack_read_var_off(struct bpf_verifier_env *env,
4949                                     int ptr_regno, int off, int size, int dst_regno)
4950 {
4951         /* The state of the source register. */
4952         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4953         struct bpf_func_state *ptr_state = func(env, reg);
4954         int err;
4955         int min_off, max_off;
4956
4957         /* Note that we pass a NULL meta, so raw access will not be permitted.
4958          */
4959         err = check_stack_range_initialized(env, ptr_regno, off, size,
4960                                             false, ACCESS_DIRECT, NULL);
4961         if (err)
4962                 return err;
4963
4964         min_off = reg->smin_value + off;
4965         max_off = reg->smax_value + off;
4966         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
4967         return 0;
4968 }
4969
4970 /* check_stack_read dispatches to check_stack_read_fixed_off or
4971  * check_stack_read_var_off.
4972  *
4973  * The caller must ensure that the offset falls within the allocated stack
4974  * bounds.
4975  *
4976  * 'dst_regno' is a register which will receive the value from the stack. It
4977  * can be -1, meaning that the read value is not going to a register.
4978  */
4979 static int check_stack_read(struct bpf_verifier_env *env,
4980                             int ptr_regno, int off, int size,
4981                             int dst_regno)
4982 {
4983         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
4984         struct bpf_func_state *state = func(env, reg);
4985         int err;
4986         /* Some accesses are only permitted with a static offset. */
4987         bool var_off = !tnum_is_const(reg->var_off);
4988
4989         /* The offset is required to be static when reads don't go to a
4990          * register, in order to not leak pointers (see
4991          * check_stack_read_fixed_off).
4992          */
4993         if (dst_regno < 0 && var_off) {
4994                 char tn_buf[48];
4995
4996                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4997                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
4998                         tn_buf, off, size);
4999                 return -EACCES;
5000         }
5001         /* Variable offset is prohibited for unprivileged mode for simplicity
5002          * since it requires corresponding support in Spectre masking for stack
5003          * ALU. See also retrieve_ptr_limit(). The check in
5004          * check_stack_access_for_ptr_arithmetic() called by
5005          * adjust_ptr_min_max_vals() prevents users from creating stack pointers
5006          * with variable offsets, therefore no check is required here. Further,
5007          * just checking it here would be insufficient as speculative stack
5008          * writes could still lead to unsafe speculative behaviour.
5009          */
5010         if (!var_off) {
5011                 off += reg->var_off.value;
5012                 err = check_stack_read_fixed_off(env, state, off, size,
5013                                                  dst_regno);
5014         } else {
5015                 /* Variable offset stack reads need more conservative handling
5016                  * than fixed offset ones. Note that dst_regno >= 0 on this
5017                  * branch.
5018                  */
5019                 err = check_stack_read_var_off(env, ptr_regno, off, size,
5020                                                dst_regno);
5021         }
5022         return err;
5023 }
5024
5025
5026 /* check_stack_write dispatches to check_stack_write_fixed_off or
5027  * check_stack_write_var_off.
5028  *
5029  * 'ptr_regno' is the register used as a pointer into the stack.
5030  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
5031  * 'value_regno' is the register whose value we're writing to the stack. It can
5032  * be -1, meaning that we're not writing from a register.
5033  *
5034  * The caller must ensure that the offset falls within the maximum stack size.
5035  */
5036 static int check_stack_write(struct bpf_verifier_env *env,
5037                              int ptr_regno, int off, int size,
5038                              int value_regno, int insn_idx)
5039 {
5040         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
5041         struct bpf_func_state *state = func(env, reg);
5042         int err;
5043
5044         if (tnum_is_const(reg->var_off)) {
5045                 off += reg->var_off.value;
5046                 err = check_stack_write_fixed_off(env, state, off, size,
5047                                                   value_regno, insn_idx);
5048         } else {
5049                 /* Variable offset stack reads need more conservative handling
5050                  * than fixed offset ones.
5051                  */
5052                 err = check_stack_write_var_off(env, state,
5053                                                 ptr_regno, off, size,
5054                                                 value_regno, insn_idx);
5055         }
5056         return err;
5057 }
5058
5059 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
5060                                  int off, int size, enum bpf_access_type type)
5061 {
5062         struct bpf_reg_state *regs = cur_regs(env);
5063         struct bpf_map *map = regs[regno].map_ptr;
5064         u32 cap = bpf_map_flags_to_cap(map);
5065
5066         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
5067                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
5068                         map->value_size, off, size);
5069                 return -EACCES;
5070         }
5071
5072         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
5073                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
5074                         map->value_size, off, size);
5075                 return -EACCES;
5076         }
5077
5078         return 0;
5079 }
5080
5081 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
5082 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
5083                               int off, int size, u32 mem_size,
5084                               bool zero_size_allowed)
5085 {
5086         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
5087         struct bpf_reg_state *reg;
5088
5089         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
5090                 return 0;
5091
5092         reg = &cur_regs(env)[regno];
5093         switch (reg->type) {
5094         case PTR_TO_MAP_KEY:
5095                 verbose(env, "invalid access to map key, key_size=%d off=%d size=%d\n",
5096                         mem_size, off, size);
5097                 break;
5098         case PTR_TO_MAP_VALUE:
5099                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
5100                         mem_size, off, size);
5101                 break;
5102         case PTR_TO_PACKET:
5103         case PTR_TO_PACKET_META:
5104         case PTR_TO_PACKET_END:
5105                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
5106                         off, size, regno, reg->id, off, mem_size);
5107                 break;
5108         case PTR_TO_MEM:
5109         default:
5110                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
5111                         mem_size, off, size);
5112         }
5113
5114         return -EACCES;
5115 }
5116
5117 /* check read/write into a memory region with possible variable offset */
5118 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
5119                                    int off, int size, u32 mem_size,
5120                                    bool zero_size_allowed)
5121 {
5122         struct bpf_verifier_state *vstate = env->cur_state;
5123         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5124         struct bpf_reg_state *reg = &state->regs[regno];
5125         int err;
5126
5127         /* We may have adjusted the register pointing to memory region, so we
5128          * need to try adding each of min_value and max_value to off
5129          * to make sure our theoretical access will be safe.
5130          *
5131          * The minimum value is only important with signed
5132          * comparisons where we can't assume the floor of a
5133          * value is 0.  If we are using signed variables for our
5134          * index'es we need to make sure that whatever we use
5135          * will have a set floor within our range.
5136          */
5137         if (reg->smin_value < 0 &&
5138             (reg->smin_value == S64_MIN ||
5139              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
5140               reg->smin_value + off < 0)) {
5141                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5142                         regno);
5143                 return -EACCES;
5144         }
5145         err = __check_mem_access(env, regno, reg->smin_value + off, size,
5146                                  mem_size, zero_size_allowed);
5147         if (err) {
5148                 verbose(env, "R%d min value is outside of the allowed memory range\n",
5149                         regno);
5150                 return err;
5151         }
5152
5153         /* If we haven't set a max value then we need to bail since we can't be
5154          * sure we won't do bad things.
5155          * If reg->umax_value + off could overflow, treat that as unbounded too.
5156          */
5157         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
5158                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
5159                         regno);
5160                 return -EACCES;
5161         }
5162         err = __check_mem_access(env, regno, reg->umax_value + off, size,
5163                                  mem_size, zero_size_allowed);
5164         if (err) {
5165                 verbose(env, "R%d max value is outside of the allowed memory range\n",
5166                         regno);
5167                 return err;
5168         }
5169
5170         return 0;
5171 }
5172
5173 static int __check_ptr_off_reg(struct bpf_verifier_env *env,
5174                                const struct bpf_reg_state *reg, int regno,
5175                                bool fixed_off_ok)
5176 {
5177         /* Access to this pointer-typed register or passing it to a helper
5178          * is only allowed in its original, unmodified form.
5179          */
5180
5181         if (reg->off < 0) {
5182                 verbose(env, "negative offset %s ptr R%d off=%d disallowed\n",
5183                         reg_type_str(env, reg->type), regno, reg->off);
5184                 return -EACCES;
5185         }
5186
5187         if (!fixed_off_ok && reg->off) {
5188                 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n",
5189                         reg_type_str(env, reg->type), regno, reg->off);
5190                 return -EACCES;
5191         }
5192
5193         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5194                 char tn_buf[48];
5195
5196                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5197                 verbose(env, "variable %s access var_off=%s disallowed\n",
5198                         reg_type_str(env, reg->type), tn_buf);
5199                 return -EACCES;
5200         }
5201
5202         return 0;
5203 }
5204
5205 int check_ptr_off_reg(struct bpf_verifier_env *env,
5206                       const struct bpf_reg_state *reg, int regno)
5207 {
5208         return __check_ptr_off_reg(env, reg, regno, false);
5209 }
5210
5211 static int map_kptr_match_type(struct bpf_verifier_env *env,
5212                                struct btf_field *kptr_field,
5213                                struct bpf_reg_state *reg, u32 regno)
5214 {
5215         const char *targ_name = btf_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
5216         int perm_flags;
5217         const char *reg_name = "";
5218
5219         if (btf_is_kernel(reg->btf)) {
5220                 perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
5221
5222                 /* Only unreferenced case accepts untrusted pointers */
5223                 if (kptr_field->type == BPF_KPTR_UNREF)
5224                         perm_flags |= PTR_UNTRUSTED;
5225         } else {
5226                 perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
5227         }
5228
5229         if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
5230                 goto bad_type;
5231
5232         /* We need to verify reg->type and reg->btf, before accessing reg->btf */
5233         reg_name = btf_type_name(reg->btf, reg->btf_id);
5234
5235         /* For ref_ptr case, release function check should ensure we get one
5236          * referenced PTR_TO_BTF_ID, and that its fixed offset is 0. For the
5237          * normal store of unreferenced kptr, we must ensure var_off is zero.
5238          * Since ref_ptr cannot be accessed directly by BPF insns, checks for
5239          * reg->off and reg->ref_obj_id are not needed here.
5240          */
5241         if (__check_ptr_off_reg(env, reg, regno, true))
5242                 return -EACCES;
5243
5244         /* A full type match is needed, as BTF can be vmlinux, module or prog BTF, and
5245          * we also need to take into account the reg->off.
5246          *
5247          * We want to support cases like:
5248          *
5249          * struct foo {
5250          *         struct bar br;
5251          *         struct baz bz;
5252          * };
5253          *
5254          * struct foo *v;
5255          * v = func();        // PTR_TO_BTF_ID
5256          * val->foo = v;      // reg->off is zero, btf and btf_id match type
5257          * val->bar = &v->br; // reg->off is still zero, but we need to retry with
5258          *                    // first member type of struct after comparison fails
5259          * val->baz = &v->bz; // reg->off is non-zero, so struct needs to be walked
5260          *                    // to match type
5261          *
5262          * In the kptr_ref case, check_func_arg_reg_off already ensures reg->off
5263          * is zero. We must also ensure that btf_struct_ids_match does not walk
5264          * the struct to match type against first member of struct, i.e. reject
5265          * second case from above. Hence, when type is BPF_KPTR_REF, we set
5266          * strict mode to true for type match.
5267          */
5268         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
5269                                   kptr_field->kptr.btf, kptr_field->kptr.btf_id,
5270                                   kptr_field->type == BPF_KPTR_REF))
5271                 goto bad_type;
5272         return 0;
5273 bad_type:
5274         verbose(env, "invalid kptr access, R%d type=%s%s ", regno,
5275                 reg_type_str(env, reg->type), reg_name);
5276         verbose(env, "expected=%s%s", reg_type_str(env, PTR_TO_BTF_ID), targ_name);
5277         if (kptr_field->type == BPF_KPTR_UNREF)
5278                 verbose(env, " or %s%s\n", reg_type_str(env, PTR_TO_BTF_ID | PTR_UNTRUSTED),
5279                         targ_name);
5280         else
5281                 verbose(env, "\n");
5282         return -EINVAL;
5283 }
5284
5285 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
5286  * can dereference RCU protected pointers and result is PTR_TRUSTED.
5287  */
5288 static bool in_rcu_cs(struct bpf_verifier_env *env)
5289 {
5290         return env->cur_state->active_rcu_lock ||
5291                env->cur_state->active_lock.ptr ||
5292                !env->prog->aux->sleepable;
5293 }
5294
5295 /* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
5296 BTF_SET_START(rcu_protected_types)
5297 BTF_ID(struct, prog_test_ref_kfunc)
5298 BTF_ID(struct, cgroup)
5299 BTF_ID(struct, bpf_cpumask)
5300 BTF_ID(struct, task_struct)
5301 BTF_SET_END(rcu_protected_types)
5302
5303 static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
5304 {
5305         if (!btf_is_kernel(btf))
5306                 return false;
5307         return btf_id_set_contains(&rcu_protected_types, btf_id);
5308 }
5309
5310 static bool rcu_safe_kptr(const struct btf_field *field)
5311 {
5312         const struct btf_field_kptr *kptr = &field->kptr;
5313
5314         return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
5315 }
5316
5317 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
5318                                  int value_regno, int insn_idx,
5319                                  struct btf_field *kptr_field)
5320 {
5321         struct bpf_insn *insn = &env->prog->insnsi[insn_idx];
5322         int class = BPF_CLASS(insn->code);
5323         struct bpf_reg_state *val_reg;
5324
5325         /* Things we already checked for in check_map_access and caller:
5326          *  - Reject cases where variable offset may touch kptr
5327          *  - size of access (must be BPF_DW)
5328          *  - tnum_is_const(reg->var_off)
5329          *  - kptr_field->offset == off + reg->var_off.value
5330          */
5331         /* Only BPF_[LDX,STX,ST] | BPF_MEM | BPF_DW is supported */
5332         if (BPF_MODE(insn->code) != BPF_MEM) {
5333                 verbose(env, "kptr in map can only be accessed using BPF_MEM instruction mode\n");
5334                 return -EACCES;
5335         }
5336
5337         /* We only allow loading referenced kptr, since it will be marked as
5338          * untrusted, similar to unreferenced kptr.
5339          */
5340         if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
5341                 verbose(env, "store to referenced kptr disallowed\n");
5342                 return -EACCES;
5343         }
5344
5345         if (class == BPF_LDX) {
5346                 val_reg = reg_state(env, value_regno);
5347                 /* We can simply mark the value_regno receiving the pointer
5348                  * value from map as PTR_TO_BTF_ID, with the correct type.
5349                  */
5350                 mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
5351                                 kptr_field->kptr.btf_id,
5352                                 rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
5353                                 PTR_MAYBE_NULL | MEM_RCU :
5354                                 PTR_MAYBE_NULL | PTR_UNTRUSTED);
5355                 /* For mark_ptr_or_null_reg */
5356                 val_reg->id = ++env->id_gen;
5357         } else if (class == BPF_STX) {
5358                 val_reg = reg_state(env, value_regno);
5359                 if (!register_is_null(val_reg) &&
5360                     map_kptr_match_type(env, kptr_field, val_reg, value_regno))
5361                         return -EACCES;
5362         } else if (class == BPF_ST) {
5363                 if (insn->imm) {
5364                         verbose(env, "BPF_ST imm must be 0 when storing to kptr at off=%u\n",
5365                                 kptr_field->offset);
5366                         return -EACCES;
5367                 }
5368         } else {
5369                 verbose(env, "kptr in map can only be accessed using BPF_LDX/BPF_STX/BPF_ST\n");
5370                 return -EACCES;
5371         }
5372         return 0;
5373 }
5374
5375 /* check read/write into a map element with possible variable offset */
5376 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
5377                             int off, int size, bool zero_size_allowed,
5378                             enum bpf_access_src src)
5379 {
5380         struct bpf_verifier_state *vstate = env->cur_state;
5381         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5382         struct bpf_reg_state *reg = &state->regs[regno];
5383         struct bpf_map *map = reg->map_ptr;
5384         struct btf_record *rec;
5385         int err, i;
5386
5387         err = check_mem_region_access(env, regno, off, size, map->value_size,
5388                                       zero_size_allowed);
5389         if (err)
5390                 return err;
5391
5392         if (IS_ERR_OR_NULL(map->record))
5393                 return 0;
5394         rec = map->record;
5395         for (i = 0; i < rec->cnt; i++) {
5396                 struct btf_field *field = &rec->fields[i];
5397                 u32 p = field->offset;
5398
5399                 /* If any part of a field  can be touched by load/store, reject
5400                  * this program. To check that [x1, x2) overlaps with [y1, y2),
5401                  * it is sufficient to check x1 < y2 && y1 < x2.
5402                  */
5403                 if (reg->smin_value + off < p + btf_field_type_size(field->type) &&
5404                     p < reg->umax_value + off + size) {
5405                         switch (field->type) {
5406                         case BPF_KPTR_UNREF:
5407                         case BPF_KPTR_REF:
5408                                 if (src != ACCESS_DIRECT) {
5409                                         verbose(env, "kptr cannot be accessed indirectly by helper\n");
5410                                         return -EACCES;
5411                                 }
5412                                 if (!tnum_is_const(reg->var_off)) {
5413                                         verbose(env, "kptr access cannot have variable offset\n");
5414                                         return -EACCES;
5415                                 }
5416                                 if (p != off + reg->var_off.value) {
5417                                         verbose(env, "kptr access misaligned expected=%u off=%llu\n",
5418                                                 p, off + reg->var_off.value);
5419                                         return -EACCES;
5420                                 }
5421                                 if (size != bpf_size_to_bytes(BPF_DW)) {
5422                                         verbose(env, "kptr access size must be BPF_DW\n");
5423                                         return -EACCES;
5424                                 }
5425                                 break;
5426                         default:
5427                                 verbose(env, "%s cannot be accessed directly by load/store\n",
5428                                         btf_field_type_name(field->type));
5429                                 return -EACCES;
5430                         }
5431                 }
5432         }
5433         return 0;
5434 }
5435
5436 #define MAX_PACKET_OFF 0xffff
5437
5438 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
5439                                        const struct bpf_call_arg_meta *meta,
5440                                        enum bpf_access_type t)
5441 {
5442         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
5443
5444         switch (prog_type) {
5445         /* Program types only with direct read access go here! */
5446         case BPF_PROG_TYPE_LWT_IN:
5447         case BPF_PROG_TYPE_LWT_OUT:
5448         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
5449         case BPF_PROG_TYPE_SK_REUSEPORT:
5450         case BPF_PROG_TYPE_FLOW_DISSECTOR:
5451         case BPF_PROG_TYPE_CGROUP_SKB:
5452                 if (t == BPF_WRITE)
5453                         return false;
5454                 fallthrough;
5455
5456         /* Program types with direct read + write access go here! */
5457         case BPF_PROG_TYPE_SCHED_CLS:
5458         case BPF_PROG_TYPE_SCHED_ACT:
5459         case BPF_PROG_TYPE_XDP:
5460         case BPF_PROG_TYPE_LWT_XMIT:
5461         case BPF_PROG_TYPE_SK_SKB:
5462         case BPF_PROG_TYPE_SK_MSG:
5463                 if (meta)
5464                         return meta->pkt_access;
5465
5466                 env->seen_direct_write = true;
5467                 return true;
5468
5469         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
5470                 if (t == BPF_WRITE)
5471                         env->seen_direct_write = true;
5472
5473                 return true;
5474
5475         default:
5476                 return false;
5477         }
5478 }
5479
5480 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
5481                                int size, bool zero_size_allowed)
5482 {
5483         struct bpf_reg_state *regs = cur_regs(env);
5484         struct bpf_reg_state *reg = &regs[regno];
5485         int err;
5486
5487         /* We may have added a variable offset to the packet pointer; but any
5488          * reg->range we have comes after that.  We are only checking the fixed
5489          * offset.
5490          */
5491
5492         /* We don't allow negative numbers, because we aren't tracking enough
5493          * detail to prove they're safe.
5494          */
5495         if (reg->smin_value < 0) {
5496                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5497                         regno);
5498                 return -EACCES;
5499         }
5500
5501         err = reg->range < 0 ? -EINVAL :
5502               __check_mem_access(env, regno, off, size, reg->range,
5503                                  zero_size_allowed);
5504         if (err) {
5505                 verbose(env, "R%d offset is outside of the packet\n", regno);
5506                 return err;
5507         }
5508
5509         /* __check_mem_access has made sure "off + size - 1" is within u16.
5510          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
5511          * otherwise find_good_pkt_pointers would have refused to set range info
5512          * that __check_mem_access would have rejected this pkt access.
5513          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
5514          */
5515         env->prog->aux->max_pkt_offset =
5516                 max_t(u32, env->prog->aux->max_pkt_offset,
5517                       off + reg->umax_value + size - 1);
5518
5519         return err;
5520 }
5521
5522 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
5523 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
5524                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
5525                             struct btf **btf, u32 *btf_id)
5526 {
5527         struct bpf_insn_access_aux info = {
5528                 .reg_type = *reg_type,
5529                 .log = &env->log,
5530         };
5531
5532         if (env->ops->is_valid_access &&
5533             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
5534                 /* A non zero info.ctx_field_size indicates that this field is a
5535                  * candidate for later verifier transformation to load the whole
5536                  * field and then apply a mask when accessed with a narrower
5537                  * access than actual ctx access size. A zero info.ctx_field_size
5538                  * will only allow for whole field access and rejects any other
5539                  * type of narrower access.
5540                  */
5541                 *reg_type = info.reg_type;
5542
5543                 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
5544                         *btf = info.btf;
5545                         *btf_id = info.btf_id;
5546                 } else {
5547                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
5548                 }
5549                 /* remember the offset of last byte accessed in ctx */
5550                 if (env->prog->aux->max_ctx_offset < off + size)
5551                         env->prog->aux->max_ctx_offset = off + size;
5552                 return 0;
5553         }
5554
5555         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
5556         return -EACCES;
5557 }
5558
5559 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
5560                                   int size)
5561 {
5562         if (size < 0 || off < 0 ||
5563             (u64)off + size > sizeof(struct bpf_flow_keys)) {
5564                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
5565                         off, size);
5566                 return -EACCES;
5567         }
5568         return 0;
5569 }
5570
5571 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
5572                              u32 regno, int off, int size,
5573                              enum bpf_access_type t)
5574 {
5575         struct bpf_reg_state *regs = cur_regs(env);
5576         struct bpf_reg_state *reg = &regs[regno];
5577         struct bpf_insn_access_aux info = {};
5578         bool valid;
5579
5580         if (reg->smin_value < 0) {
5581                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
5582                         regno);
5583                 return -EACCES;
5584         }
5585
5586         switch (reg->type) {
5587         case PTR_TO_SOCK_COMMON:
5588                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
5589                 break;
5590         case PTR_TO_SOCKET:
5591                 valid = bpf_sock_is_valid_access(off, size, t, &info);
5592                 break;
5593         case PTR_TO_TCP_SOCK:
5594                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
5595                 break;
5596         case PTR_TO_XDP_SOCK:
5597                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
5598                 break;
5599         default:
5600                 valid = false;
5601         }
5602
5603
5604         if (valid) {
5605                 env->insn_aux_data[insn_idx].ctx_field_size =
5606                         info.ctx_field_size;
5607                 return 0;
5608         }
5609
5610         verbose(env, "R%d invalid %s access off=%d size=%d\n",
5611                 regno, reg_type_str(env, reg->type), off, size);
5612
5613         return -EACCES;
5614 }
5615
5616 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
5617 {
5618         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
5619 }
5620
5621 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
5622 {
5623         const struct bpf_reg_state *reg = reg_state(env, regno);
5624
5625         return reg->type == PTR_TO_CTX;
5626 }
5627
5628 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
5629 {
5630         const struct bpf_reg_state *reg = reg_state(env, regno);
5631
5632         return type_is_sk_pointer(reg->type);
5633 }
5634
5635 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
5636 {
5637         const struct bpf_reg_state *reg = reg_state(env, regno);
5638
5639         return type_is_pkt_pointer(reg->type);
5640 }
5641
5642 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
5643 {
5644         const struct bpf_reg_state *reg = reg_state(env, regno);
5645
5646         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
5647         return reg->type == PTR_TO_FLOW_KEYS;
5648 }
5649
5650 static u32 *reg2btf_ids[__BPF_REG_TYPE_MAX] = {
5651 #ifdef CONFIG_NET
5652         [PTR_TO_SOCKET] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK],
5653         [PTR_TO_SOCK_COMMON] = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
5654         [PTR_TO_TCP_SOCK] = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
5655 #endif
5656         [CONST_PTR_TO_MAP] = btf_bpf_map_id,
5657 };
5658
5659 static bool is_trusted_reg(const struct bpf_reg_state *reg)
5660 {
5661         /* A referenced register is always trusted. */
5662         if (reg->ref_obj_id)
5663                 return true;
5664
5665         /* Types listed in the reg2btf_ids are always trusted */
5666         if (reg2btf_ids[base_type(reg->type)])
5667                 return true;
5668
5669         /* If a register is not referenced, it is trusted if it has the
5670          * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
5671          * other type modifiers may be safe, but we elect to take an opt-in
5672          * approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
5673          * not.
5674          *
5675          * Eventually, we should make PTR_TRUSTED the single source of truth
5676          * for whether a register is trusted.
5677          */
5678         return type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS &&
5679                !bpf_type_has_unsafe_modifiers(reg->type);
5680 }
5681
5682 static bool is_rcu_reg(const struct bpf_reg_state *reg)
5683 {
5684         return reg->type & MEM_RCU;
5685 }
5686
5687 static void clear_trusted_flags(enum bpf_type_flag *flag)
5688 {
5689         *flag &= ~(BPF_REG_TRUSTED_MODIFIERS | MEM_RCU);
5690 }
5691
5692 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
5693                                    const struct bpf_reg_state *reg,
5694                                    int off, int size, bool strict)
5695 {
5696         struct tnum reg_off;
5697         int ip_align;
5698
5699         /* Byte size accesses are always allowed. */
5700         if (!strict || size == 1)
5701                 return 0;
5702
5703         /* For platforms that do not have a Kconfig enabling
5704          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
5705          * NET_IP_ALIGN is universally set to '2'.  And on platforms
5706          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
5707          * to this code only in strict mode where we want to emulate
5708          * the NET_IP_ALIGN==2 checking.  Therefore use an
5709          * unconditional IP align value of '2'.
5710          */
5711         ip_align = 2;
5712
5713         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
5714         if (!tnum_is_aligned(reg_off, size)) {
5715                 char tn_buf[48];
5716
5717                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5718                 verbose(env,
5719                         "misaligned packet access off %d+%s+%d+%d size %d\n",
5720                         ip_align, tn_buf, reg->off, off, size);
5721                 return -EACCES;
5722         }
5723
5724         return 0;
5725 }
5726
5727 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
5728                                        const struct bpf_reg_state *reg,
5729                                        const char *pointer_desc,
5730                                        int off, int size, bool strict)
5731 {
5732         struct tnum reg_off;
5733
5734         /* Byte size accesses are always allowed. */
5735         if (!strict || size == 1)
5736                 return 0;
5737
5738         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
5739         if (!tnum_is_aligned(reg_off, size)) {
5740                 char tn_buf[48];
5741
5742                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5743                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
5744                         pointer_desc, tn_buf, reg->off, off, size);
5745                 return -EACCES;
5746         }
5747
5748         return 0;
5749 }
5750
5751 static int check_ptr_alignment(struct bpf_verifier_env *env,
5752                                const struct bpf_reg_state *reg, int off,
5753                                int size, bool strict_alignment_once)
5754 {
5755         bool strict = env->strict_alignment || strict_alignment_once;
5756         const char *pointer_desc = "";
5757
5758         switch (reg->type) {
5759         case PTR_TO_PACKET:
5760         case PTR_TO_PACKET_META:
5761                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
5762                  * right in front, treat it the very same way.
5763                  */
5764                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
5765         case PTR_TO_FLOW_KEYS:
5766                 pointer_desc = "flow keys ";
5767                 break;
5768         case PTR_TO_MAP_KEY:
5769                 pointer_desc = "key ";
5770                 break;
5771         case PTR_TO_MAP_VALUE:
5772                 pointer_desc = "value ";
5773                 break;
5774         case PTR_TO_CTX:
5775                 pointer_desc = "context ";
5776                 break;
5777         case PTR_TO_STACK:
5778                 pointer_desc = "stack ";
5779                 /* The stack spill tracking logic in check_stack_write_fixed_off()
5780                  * and check_stack_read_fixed_off() relies on stack accesses being
5781                  * aligned.
5782                  */
5783                 strict = true;
5784                 break;
5785         case PTR_TO_SOCKET:
5786                 pointer_desc = "sock ";
5787                 break;
5788         case PTR_TO_SOCK_COMMON:
5789                 pointer_desc = "sock_common ";
5790                 break;
5791         case PTR_TO_TCP_SOCK:
5792                 pointer_desc = "tcp_sock ";
5793                 break;
5794         case PTR_TO_XDP_SOCK:
5795                 pointer_desc = "xdp_sock ";
5796                 break;
5797         default:
5798                 break;
5799         }
5800         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
5801                                            strict);
5802 }
5803
5804 /* starting from main bpf function walk all instructions of the function
5805  * and recursively walk all callees that given function can call.
5806  * Ignore jump and exit insns.
5807  * Since recursion is prevented by check_cfg() this algorithm
5808  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
5809  */
5810 static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
5811 {
5812         struct bpf_subprog_info *subprog = env->subprog_info;
5813         struct bpf_insn *insn = env->prog->insnsi;
5814         int depth = 0, frame = 0, i, subprog_end;
5815         bool tail_call_reachable = false;
5816         int ret_insn[MAX_CALL_FRAMES];
5817         int ret_prog[MAX_CALL_FRAMES];
5818         int j;
5819
5820         i = subprog[idx].start;
5821 process_func:
5822         /* protect against potential stack overflow that might happen when
5823          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
5824          * depth for such case down to 256 so that the worst case scenario
5825          * would result in 8k stack size (32 which is tailcall limit * 256 =
5826          * 8k).
5827          *
5828          * To get the idea what might happen, see an example:
5829          * func1 -> sub rsp, 128
5830          *  subfunc1 -> sub rsp, 256
5831          *  tailcall1 -> add rsp, 256
5832          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
5833          *   subfunc2 -> sub rsp, 64
5834          *   subfunc22 -> sub rsp, 128
5835          *   tailcall2 -> add rsp, 128
5836          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
5837          *
5838          * tailcall will unwind the current stack frame but it will not get rid
5839          * of caller's stack as shown on the example above.
5840          */
5841         if (idx && subprog[idx].has_tail_call && depth >= 256) {
5842                 verbose(env,
5843                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
5844                         depth);
5845                 return -EACCES;
5846         }
5847         /* round up to 32-bytes, since this is granularity
5848          * of interpreter stack size
5849          */
5850         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
5851         if (depth > MAX_BPF_STACK) {
5852                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
5853                         frame + 1, depth);
5854                 return -EACCES;
5855         }
5856 continue_func:
5857         subprog_end = subprog[idx + 1].start;
5858         for (; i < subprog_end; i++) {
5859                 int next_insn, sidx;
5860
5861                 if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
5862                         continue;
5863                 /* remember insn and function to return to */
5864                 ret_insn[frame] = i + 1;
5865                 ret_prog[frame] = idx;
5866
5867                 /* find the callee */
5868                 next_insn = i + insn[i].imm + 1;
5869                 sidx = find_subprog(env, next_insn);
5870                 if (sidx < 0) {
5871                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5872                                   next_insn);
5873                         return -EFAULT;
5874                 }
5875                 if (subprog[sidx].is_async_cb) {
5876                         if (subprog[sidx].has_tail_call) {
5877                                 verbose(env, "verifier bug. subprog has tail_call and async cb\n");
5878                                 return -EFAULT;
5879                         }
5880                         /* async callbacks don't increase bpf prog stack size unless called directly */
5881                         if (!bpf_pseudo_call(insn + i))
5882                                 continue;
5883                 }
5884                 i = next_insn;
5885                 idx = sidx;
5886
5887                 if (subprog[idx].has_tail_call)
5888                         tail_call_reachable = true;
5889
5890                 frame++;
5891                 if (frame >= MAX_CALL_FRAMES) {
5892                         verbose(env, "the call stack of %d frames is too deep !\n",
5893                                 frame);
5894                         return -E2BIG;
5895                 }
5896                 goto process_func;
5897         }
5898         /* if tail call got detected across bpf2bpf calls then mark each of the
5899          * currently present subprog frames as tail call reachable subprogs;
5900          * this info will be utilized by JIT so that we will be preserving the
5901          * tail call counter throughout bpf2bpf calls combined with tailcalls
5902          */
5903         if (tail_call_reachable)
5904                 for (j = 0; j < frame; j++)
5905                         subprog[ret_prog[j]].tail_call_reachable = true;
5906         if (subprog[0].tail_call_reachable)
5907                 env->prog->aux->tail_call_reachable = true;
5908
5909         /* end of for() loop means the last insn of the 'subprog'
5910          * was reached. Doesn't matter whether it was JA or EXIT
5911          */
5912         if (frame == 0)
5913                 return 0;
5914         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
5915         frame--;
5916         i = ret_insn[frame];
5917         idx = ret_prog[frame];
5918         goto continue_func;
5919 }
5920
5921 static int check_max_stack_depth(struct bpf_verifier_env *env)
5922 {
5923         struct bpf_subprog_info *si = env->subprog_info;
5924         int ret;
5925
5926         for (int i = 0; i < env->subprog_cnt; i++) {
5927                 if (!i || si[i].is_async_cb) {
5928                         ret = check_max_stack_depth_subprog(env, i);
5929                         if (ret < 0)
5930                                 return ret;
5931                 }
5932                 continue;
5933         }
5934         return 0;
5935 }
5936
5937 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
5938 static int get_callee_stack_depth(struct bpf_verifier_env *env,
5939                                   const struct bpf_insn *insn, int idx)
5940 {
5941         int start = idx + insn->imm + 1, subprog;
5942
5943         subprog = find_subprog(env, start);
5944         if (subprog < 0) {
5945                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
5946                           start);
5947                 return -EFAULT;
5948         }
5949         return env->subprog_info[subprog].stack_depth;
5950 }
5951 #endif
5952
5953 static int __check_buffer_access(struct bpf_verifier_env *env,
5954                                  const char *buf_info,
5955                                  const struct bpf_reg_state *reg,
5956                                  int regno, int off, int size)
5957 {
5958         if (off < 0) {
5959                 verbose(env,
5960                         "R%d invalid %s buffer access: off=%d, size=%d\n",
5961                         regno, buf_info, off, size);
5962                 return -EACCES;
5963         }
5964         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
5965                 char tn_buf[48];
5966
5967                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5968                 verbose(env,
5969                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
5970                         regno, off, tn_buf);
5971                 return -EACCES;
5972         }
5973
5974         return 0;
5975 }
5976
5977 static int check_tp_buffer_access(struct bpf_verifier_env *env,
5978                                   const struct bpf_reg_state *reg,
5979                                   int regno, int off, int size)
5980 {
5981         int err;
5982
5983         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
5984         if (err)
5985                 return err;
5986
5987         if (off + size > env->prog->aux->max_tp_access)
5988                 env->prog->aux->max_tp_access = off + size;
5989
5990         return 0;
5991 }
5992
5993 static int check_buffer_access(struct bpf_verifier_env *env,
5994                                const struct bpf_reg_state *reg,
5995                                int regno, int off, int size,
5996                                bool zero_size_allowed,
5997                                u32 *max_access)
5998 {
5999         const char *buf_info = type_is_rdonly_mem(reg->type) ? "rdonly" : "rdwr";
6000         int err;
6001
6002         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
6003         if (err)
6004                 return err;
6005
6006         if (off + size > *max_access)
6007                 *max_access = off + size;
6008
6009         return 0;
6010 }
6011
6012 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
6013 static void zext_32_to_64(struct bpf_reg_state *reg)
6014 {
6015         reg->var_off = tnum_subreg(reg->var_off);
6016         __reg_assign_32_into_64(reg);
6017 }
6018
6019 /* truncate register to smaller size (in bytes)
6020  * must be called with size < BPF_REG_SIZE
6021  */
6022 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
6023 {
6024         u64 mask;
6025
6026         /* clear high bits in bit representation */
6027         reg->var_off = tnum_cast(reg->var_off, size);
6028
6029         /* fix arithmetic bounds */
6030         mask = ((u64)1 << (size * 8)) - 1;
6031         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
6032                 reg->umin_value &= mask;
6033                 reg->umax_value &= mask;
6034         } else {
6035                 reg->umin_value = 0;
6036                 reg->umax_value = mask;
6037         }
6038         reg->smin_value = reg->umin_value;
6039         reg->smax_value = reg->umax_value;
6040
6041         /* If size is smaller than 32bit register the 32bit register
6042          * values are also truncated so we push 64-bit bounds into
6043          * 32-bit bounds. Above were truncated < 32-bits already.
6044          */
6045         if (size >= 4)
6046                 return;
6047         __reg_combine_64_into_32(reg);
6048 }
6049
6050 static void set_sext64_default_val(struct bpf_reg_state *reg, int size)
6051 {
6052         if (size == 1) {
6053                 reg->smin_value = reg->s32_min_value = S8_MIN;
6054                 reg->smax_value = reg->s32_max_value = S8_MAX;
6055         } else if (size == 2) {
6056                 reg->smin_value = reg->s32_min_value = S16_MIN;
6057                 reg->smax_value = reg->s32_max_value = S16_MAX;
6058         } else {
6059                 /* size == 4 */
6060                 reg->smin_value = reg->s32_min_value = S32_MIN;
6061                 reg->smax_value = reg->s32_max_value = S32_MAX;
6062         }
6063         reg->umin_value = reg->u32_min_value = 0;
6064         reg->umax_value = U64_MAX;
6065         reg->u32_max_value = U32_MAX;
6066         reg->var_off = tnum_unknown;
6067 }
6068
6069 static void coerce_reg_to_size_sx(struct bpf_reg_state *reg, int size)
6070 {
6071         s64 init_s64_max, init_s64_min, s64_max, s64_min, u64_cval;
6072         u64 top_smax_value, top_smin_value;
6073         u64 num_bits = size * 8;
6074
6075         if (tnum_is_const(reg->var_off)) {
6076                 u64_cval = reg->var_off.value;
6077                 if (size == 1)
6078                         reg->var_off = tnum_const((s8)u64_cval);
6079                 else if (size == 2)
6080                         reg->var_off = tnum_const((s16)u64_cval);
6081                 else
6082                         /* size == 4 */
6083                         reg->var_off = tnum_const((s32)u64_cval);
6084
6085                 u64_cval = reg->var_off.value;
6086                 reg->smax_value = reg->smin_value = u64_cval;
6087                 reg->umax_value = reg->umin_value = u64_cval;
6088                 reg->s32_max_value = reg->s32_min_value = u64_cval;
6089                 reg->u32_max_value = reg->u32_min_value = u64_cval;
6090                 return;
6091         }
6092
6093         top_smax_value = ((u64)reg->smax_value >> num_bits) << num_bits;
6094         top_smin_value = ((u64)reg->smin_value >> num_bits) << num_bits;
6095
6096         if (top_smax_value != top_smin_value)
6097                 goto out;
6098
6099         /* find the s64_min and s64_min after sign extension */
6100         if (size == 1) {
6101                 init_s64_max = (s8)reg->smax_value;
6102                 init_s64_min = (s8)reg->smin_value;
6103         } else if (size == 2) {
6104                 init_s64_max = (s16)reg->smax_value;
6105                 init_s64_min = (s16)reg->smin_value;
6106         } else {
6107                 init_s64_max = (s32)reg->smax_value;
6108                 init_s64_min = (s32)reg->smin_value;
6109         }
6110
6111         s64_max = max(init_s64_max, init_s64_min);
6112         s64_min = min(init_s64_max, init_s64_min);
6113
6114         /* both of s64_max/s64_min positive or negative */
6115         if ((s64_max >= 0) == (s64_min >= 0)) {
6116                 reg->smin_value = reg->s32_min_value = s64_min;
6117                 reg->smax_value = reg->s32_max_value = s64_max;
6118                 reg->umin_value = reg->u32_min_value = s64_min;
6119                 reg->umax_value = reg->u32_max_value = s64_max;
6120                 reg->var_off = tnum_range(s64_min, s64_max);
6121                 return;
6122         }
6123
6124 out:
6125         set_sext64_default_val(reg, size);
6126 }
6127
6128 static void set_sext32_default_val(struct bpf_reg_state *reg, int size)
6129 {
6130         if (size == 1) {
6131                 reg->s32_min_value = S8_MIN;
6132                 reg->s32_max_value = S8_MAX;
6133         } else {
6134                 /* size == 2 */
6135                 reg->s32_min_value = S16_MIN;
6136                 reg->s32_max_value = S16_MAX;
6137         }
6138         reg->u32_min_value = 0;
6139         reg->u32_max_value = U32_MAX;
6140 }
6141
6142 static void coerce_subreg_to_size_sx(struct bpf_reg_state *reg, int size)
6143 {
6144         s32 init_s32_max, init_s32_min, s32_max, s32_min, u32_val;
6145         u32 top_smax_value, top_smin_value;
6146         u32 num_bits = size * 8;
6147
6148         if (tnum_is_const(reg->var_off)) {
6149                 u32_val = reg->var_off.value;
6150                 if (size == 1)
6151                         reg->var_off = tnum_const((s8)u32_val);
6152                 else
6153                         reg->var_off = tnum_const((s16)u32_val);
6154
6155                 u32_val = reg->var_off.value;
6156                 reg->s32_min_value = reg->s32_max_value = u32_val;
6157                 reg->u32_min_value = reg->u32_max_value = u32_val;
6158                 return;
6159         }
6160
6161         top_smax_value = ((u32)reg->s32_max_value >> num_bits) << num_bits;
6162         top_smin_value = ((u32)reg->s32_min_value >> num_bits) << num_bits;
6163
6164         if (top_smax_value != top_smin_value)
6165                 goto out;
6166
6167         /* find the s32_min and s32_min after sign extension */
6168         if (size == 1) {
6169                 init_s32_max = (s8)reg->s32_max_value;
6170                 init_s32_min = (s8)reg->s32_min_value;
6171         } else {
6172                 /* size == 2 */
6173                 init_s32_max = (s16)reg->s32_max_value;
6174                 init_s32_min = (s16)reg->s32_min_value;
6175         }
6176         s32_max = max(init_s32_max, init_s32_min);
6177         s32_min = min(init_s32_max, init_s32_min);
6178
6179         if ((s32_min >= 0) == (s32_max >= 0)) {
6180                 reg->s32_min_value = s32_min;
6181                 reg->s32_max_value = s32_max;
6182                 reg->u32_min_value = (u32)s32_min;
6183                 reg->u32_max_value = (u32)s32_max;
6184                 return;
6185         }
6186
6187 out:
6188         set_sext32_default_val(reg, size);
6189 }
6190
6191 static bool bpf_map_is_rdonly(const struct bpf_map *map)
6192 {
6193         /* A map is considered read-only if the following condition are true:
6194          *
6195          * 1) BPF program side cannot change any of the map content. The
6196          *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
6197          *    and was set at map creation time.
6198          * 2) The map value(s) have been initialized from user space by a
6199          *    loader and then "frozen", such that no new map update/delete
6200          *    operations from syscall side are possible for the rest of
6201          *    the map's lifetime from that point onwards.
6202          * 3) Any parallel/pending map update/delete operations from syscall
6203          *    side have been completed. Only after that point, it's safe to
6204          *    assume that map value(s) are immutable.
6205          */
6206         return (map->map_flags & BPF_F_RDONLY_PROG) &&
6207                READ_ONCE(map->frozen) &&
6208                !bpf_map_write_active(map);
6209 }
6210
6211 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val,
6212                                bool is_ldsx)
6213 {
6214         void *ptr;
6215         u64 addr;
6216         int err;
6217
6218         err = map->ops->map_direct_value_addr(map, &addr, off);
6219         if (err)
6220                 return err;
6221         ptr = (void *)(long)addr + off;
6222
6223         switch (size) {
6224         case sizeof(u8):
6225                 *val = is_ldsx ? (s64)*(s8 *)ptr : (u64)*(u8 *)ptr;
6226                 break;
6227         case sizeof(u16):
6228                 *val = is_ldsx ? (s64)*(s16 *)ptr : (u64)*(u16 *)ptr;
6229                 break;
6230         case sizeof(u32):
6231                 *val = is_ldsx ? (s64)*(s32 *)ptr : (u64)*(u32 *)ptr;
6232                 break;
6233         case sizeof(u64):
6234                 *val = *(u64 *)ptr;
6235                 break;
6236         default:
6237                 return -EINVAL;
6238         }
6239         return 0;
6240 }
6241
6242 #define BTF_TYPE_SAFE_RCU(__type)  __PASTE(__type, __safe_rcu)
6243 #define BTF_TYPE_SAFE_RCU_OR_NULL(__type)  __PASTE(__type, __safe_rcu_or_null)
6244 #define BTF_TYPE_SAFE_TRUSTED(__type)  __PASTE(__type, __safe_trusted)
6245
6246 /*
6247  * Allow list few fields as RCU trusted or full trusted.
6248  * This logic doesn't allow mix tagging and will be removed once GCC supports
6249  * btf_type_tag.
6250  */
6251
6252 /* RCU trusted: these fields are trusted in RCU CS and never NULL */
6253 BTF_TYPE_SAFE_RCU(struct task_struct) {
6254         const cpumask_t *cpus_ptr;
6255         struct css_set __rcu *cgroups;
6256         struct task_struct __rcu *real_parent;
6257         struct task_struct *group_leader;
6258 };
6259
6260 BTF_TYPE_SAFE_RCU(struct cgroup) {
6261         /* cgrp->kn is always accessible as documented in kernel/cgroup/cgroup.c */
6262         struct kernfs_node *kn;
6263 };
6264
6265 BTF_TYPE_SAFE_RCU(struct css_set) {
6266         struct cgroup *dfl_cgrp;
6267 };
6268
6269 /* RCU trusted: these fields are trusted in RCU CS and can be NULL */
6270 BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct) {
6271         struct file __rcu *exe_file;
6272 };
6273
6274 /* skb->sk, req->sk are not RCU protected, but we mark them as such
6275  * because bpf prog accessible sockets are SOCK_RCU_FREE.
6276  */
6277 BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff) {
6278         struct sock *sk;
6279 };
6280
6281 BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock) {
6282         struct sock *sk;
6283 };
6284
6285 /* full trusted: these fields are trusted even outside of RCU CS and never NULL */
6286 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta) {
6287         struct seq_file *seq;
6288 };
6289
6290 BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task) {
6291         struct bpf_iter_meta *meta;
6292         struct task_struct *task;
6293 };
6294
6295 BTF_TYPE_SAFE_TRUSTED(struct linux_binprm) {
6296         struct file *file;
6297 };
6298
6299 BTF_TYPE_SAFE_TRUSTED(struct file) {
6300         struct inode *f_inode;
6301 };
6302
6303 BTF_TYPE_SAFE_TRUSTED(struct dentry) {
6304         /* no negative dentry-s in places where bpf can see it */
6305         struct inode *d_inode;
6306 };
6307
6308 BTF_TYPE_SAFE_TRUSTED(struct socket) {
6309         struct sock *sk;
6310 };
6311
6312 static bool type_is_rcu(struct bpf_verifier_env *env,
6313                         struct bpf_reg_state *reg,
6314                         const char *field_name, u32 btf_id)
6315 {
6316         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct task_struct));
6317         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct cgroup));
6318         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU(struct css_set));
6319
6320         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu");
6321 }
6322
6323 static bool type_is_rcu_or_null(struct bpf_verifier_env *env,
6324                                 struct bpf_reg_state *reg,
6325                                 const char *field_name, u32 btf_id)
6326 {
6327         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct mm_struct));
6328         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct sk_buff));
6329         BTF_TYPE_EMIT(BTF_TYPE_SAFE_RCU_OR_NULL(struct request_sock));
6330
6331         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_rcu_or_null");
6332 }
6333
6334 static bool type_is_trusted(struct bpf_verifier_env *env,
6335                             struct bpf_reg_state *reg,
6336                             const char *field_name, u32 btf_id)
6337 {
6338         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter_meta));
6339         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct bpf_iter__task));
6340         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct linux_binprm));
6341         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct file));
6342         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct dentry));
6343         BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED(struct socket));
6344
6345         return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id, "__safe_trusted");
6346 }
6347
6348 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
6349                                    struct bpf_reg_state *regs,
6350                                    int regno, int off, int size,
6351                                    enum bpf_access_type atype,
6352                                    int value_regno)
6353 {
6354         struct bpf_reg_state *reg = regs + regno;
6355         const struct btf_type *t = btf_type_by_id(reg->btf, reg->btf_id);
6356         const char *tname = btf_name_by_offset(reg->btf, t->name_off);
6357         const char *field_name = NULL;
6358         enum bpf_type_flag flag = 0;
6359         u32 btf_id = 0;
6360         int ret;
6361
6362         if (!env->allow_ptr_leaks) {
6363                 verbose(env,
6364                         "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6365                         tname);
6366                 return -EPERM;
6367         }
6368         if (!env->prog->gpl_compatible && btf_is_kernel(reg->btf)) {
6369                 verbose(env,
6370                         "Cannot access kernel 'struct %s' from non-GPL compatible program\n",
6371                         tname);
6372                 return -EINVAL;
6373         }
6374         if (off < 0) {
6375                 verbose(env,
6376                         "R%d is ptr_%s invalid negative access: off=%d\n",
6377                         regno, tname, off);
6378                 return -EACCES;
6379         }
6380         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
6381                 char tn_buf[48];
6382
6383                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6384                 verbose(env,
6385                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
6386                         regno, tname, off, tn_buf);
6387                 return -EACCES;
6388         }
6389
6390         if (reg->type & MEM_USER) {
6391                 verbose(env,
6392                         "R%d is ptr_%s access user memory: off=%d\n",
6393                         regno, tname, off);
6394                 return -EACCES;
6395         }
6396
6397         if (reg->type & MEM_PERCPU) {
6398                 verbose(env,
6399                         "R%d is ptr_%s access percpu memory: off=%d\n",
6400                         regno, tname, off);
6401                 return -EACCES;
6402         }
6403
6404         if (env->ops->btf_struct_access && !type_is_alloc(reg->type) && atype == BPF_WRITE) {
6405                 if (!btf_is_kernel(reg->btf)) {
6406                         verbose(env, "verifier internal error: reg->btf must be kernel btf\n");
6407                         return -EFAULT;
6408                 }
6409                 ret = env->ops->btf_struct_access(&env->log, reg, off, size);
6410         } else {
6411                 /* Writes are permitted with default btf_struct_access for
6412                  * program allocated objects (which always have ref_obj_id > 0),
6413                  * but not for untrusted PTR_TO_BTF_ID | MEM_ALLOC.
6414                  */
6415                 if (atype != BPF_READ && !type_is_ptr_alloc_obj(reg->type)) {
6416                         verbose(env, "only read is supported\n");
6417                         return -EACCES;
6418                 }
6419
6420                 if (type_is_alloc(reg->type) && !type_is_non_owning_ref(reg->type) &&
6421                     !reg->ref_obj_id) {
6422                         verbose(env, "verifier internal error: ref_obj_id for allocated object must be non-zero\n");
6423                         return -EFAULT;
6424                 }
6425
6426                 ret = btf_struct_access(&env->log, reg, off, size, atype, &btf_id, &flag, &field_name);
6427         }
6428
6429         if (ret < 0)
6430                 return ret;
6431
6432         if (ret != PTR_TO_BTF_ID) {
6433                 /* just mark; */
6434
6435         } else if (type_flag(reg->type) & PTR_UNTRUSTED) {
6436                 /* If this is an untrusted pointer, all pointers formed by walking it
6437                  * also inherit the untrusted flag.
6438                  */
6439                 flag = PTR_UNTRUSTED;
6440
6441         } else if (is_trusted_reg(reg) || is_rcu_reg(reg)) {
6442                 /* By default any pointer obtained from walking a trusted pointer is no
6443                  * longer trusted, unless the field being accessed has explicitly been
6444                  * marked as inheriting its parent's state of trust (either full or RCU).
6445                  * For example:
6446                  * 'cgroups' pointer is untrusted if task->cgroups dereference
6447                  * happened in a sleepable program outside of bpf_rcu_read_lock()
6448                  * section. In a non-sleepable program it's trusted while in RCU CS (aka MEM_RCU).
6449                  * Note bpf_rcu_read_unlock() converts MEM_RCU pointers to PTR_UNTRUSTED.
6450                  *
6451                  * A regular RCU-protected pointer with __rcu tag can also be deemed
6452                  * trusted if we are in an RCU CS. Such pointer can be NULL.
6453                  */
6454                 if (type_is_trusted(env, reg, field_name, btf_id)) {
6455                         flag |= PTR_TRUSTED;
6456                 } else if (in_rcu_cs(env) && !type_may_be_null(reg->type)) {
6457                         if (type_is_rcu(env, reg, field_name, btf_id)) {
6458                                 /* ignore __rcu tag and mark it MEM_RCU */
6459                                 flag |= MEM_RCU;
6460                         } else if (flag & MEM_RCU ||
6461                                    type_is_rcu_or_null(env, reg, field_name, btf_id)) {
6462                                 /* __rcu tagged pointers can be NULL */
6463                                 flag |= MEM_RCU | PTR_MAYBE_NULL;
6464
6465                                 /* We always trust them */
6466                                 if (type_is_rcu_or_null(env, reg, field_name, btf_id) &&
6467                                     flag & PTR_UNTRUSTED)
6468                                         flag &= ~PTR_UNTRUSTED;
6469                         } else if (flag & (MEM_PERCPU | MEM_USER)) {
6470                                 /* keep as-is */
6471                         } else {
6472                                 /* walking unknown pointers yields old deprecated PTR_TO_BTF_ID */
6473                                 clear_trusted_flags(&flag);
6474                         }
6475                 } else {
6476                         /*
6477                          * If not in RCU CS or MEM_RCU pointer can be NULL then
6478                          * aggressively mark as untrusted otherwise such
6479                          * pointers will be plain PTR_TO_BTF_ID without flags
6480                          * and will be allowed to be passed into helpers for
6481                          * compat reasons.
6482                          */
6483                         flag = PTR_UNTRUSTED;
6484                 }
6485         } else {
6486                 /* Old compat. Deprecated */
6487                 clear_trusted_flags(&flag);
6488         }
6489
6490         if (atype == BPF_READ && value_regno >= 0)
6491                 mark_btf_ld_reg(env, regs, value_regno, ret, reg->btf, btf_id, flag);
6492
6493         return 0;
6494 }
6495
6496 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
6497                                    struct bpf_reg_state *regs,
6498                                    int regno, int off, int size,
6499                                    enum bpf_access_type atype,
6500                                    int value_regno)
6501 {
6502         struct bpf_reg_state *reg = regs + regno;
6503         struct bpf_map *map = reg->map_ptr;
6504         struct bpf_reg_state map_reg;
6505         enum bpf_type_flag flag = 0;
6506         const struct btf_type *t;
6507         const char *tname;
6508         u32 btf_id;
6509         int ret;
6510
6511         if (!btf_vmlinux) {
6512                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
6513                 return -ENOTSUPP;
6514         }
6515
6516         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
6517                 verbose(env, "map_ptr access not supported for map type %d\n",
6518                         map->map_type);
6519                 return -ENOTSUPP;
6520         }
6521
6522         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
6523         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
6524
6525         if (!env->allow_ptr_leaks) {
6526                 verbose(env,
6527                         "'struct %s' access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
6528                         tname);
6529                 return -EPERM;
6530         }
6531
6532         if (off < 0) {
6533                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
6534                         regno, tname, off);
6535                 return -EACCES;
6536         }
6537
6538         if (atype != BPF_READ) {
6539                 verbose(env, "only read from %s is supported\n", tname);
6540                 return -EACCES;
6541         }
6542
6543         /* Simulate access to a PTR_TO_BTF_ID */
6544         memset(&map_reg, 0, sizeof(map_reg));
6545         mark_btf_ld_reg(env, &map_reg, 0, PTR_TO_BTF_ID, btf_vmlinux, *map->ops->map_btf_id, 0);
6546         ret = btf_struct_access(&env->log, &map_reg, off, size, atype, &btf_id, &flag, NULL);
6547         if (ret < 0)
6548                 return ret;
6549
6550         if (value_regno >= 0)
6551                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_vmlinux, btf_id, flag);
6552
6553         return 0;
6554 }
6555
6556 /* Check that the stack access at the given offset is within bounds. The
6557  * maximum valid offset is -1.
6558  *
6559  * The minimum valid offset is -MAX_BPF_STACK for writes, and
6560  * -state->allocated_stack for reads.
6561  */
6562 static int check_stack_slot_within_bounds(struct bpf_verifier_env *env,
6563                                           s64 off,
6564                                           struct bpf_func_state *state,
6565                                           enum bpf_access_type t)
6566 {
6567         int min_valid_off;
6568
6569         if (t == BPF_WRITE || env->allow_uninit_stack)
6570                 min_valid_off = -MAX_BPF_STACK;
6571         else
6572                 min_valid_off = -state->allocated_stack;
6573
6574         if (off < min_valid_off || off > -1)
6575                 return -EACCES;
6576         return 0;
6577 }
6578
6579 /* Check that the stack access at 'regno + off' falls within the maximum stack
6580  * bounds.
6581  *
6582  * 'off' includes `regno->offset`, but not its dynamic part (if any).
6583  */
6584 static int check_stack_access_within_bounds(
6585                 struct bpf_verifier_env *env,
6586                 int regno, int off, int access_size,
6587                 enum bpf_access_src src, enum bpf_access_type type)
6588 {
6589         struct bpf_reg_state *regs = cur_regs(env);
6590         struct bpf_reg_state *reg = regs + regno;
6591         struct bpf_func_state *state = func(env, reg);
6592         s64 min_off, max_off;
6593         int err;
6594         char *err_extra;
6595
6596         if (src == ACCESS_HELPER)
6597                 /* We don't know if helpers are reading or writing (or both). */
6598                 err_extra = " indirect access to";
6599         else if (type == BPF_READ)
6600                 err_extra = " read from";
6601         else
6602                 err_extra = " write to";
6603
6604         if (tnum_is_const(reg->var_off)) {
6605                 min_off = (s64)reg->var_off.value + off;
6606                 max_off = min_off + access_size;
6607         } else {
6608                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
6609                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
6610                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
6611                                 err_extra, regno);
6612                         return -EACCES;
6613                 }
6614                 min_off = reg->smin_value + off;
6615                 max_off = reg->smax_value + off + access_size;
6616         }
6617
6618         err = check_stack_slot_within_bounds(env, min_off, state, type);
6619         if (!err && max_off > 0)
6620                 err = -EINVAL; /* out of stack access into non-negative offsets */
6621
6622         if (err) {
6623                 if (tnum_is_const(reg->var_off)) {
6624                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
6625                                 err_extra, regno, off, access_size);
6626                 } else {
6627                         char tn_buf[48];
6628
6629                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
6630                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
6631                                 err_extra, regno, tn_buf, access_size);
6632                 }
6633                 return err;
6634         }
6635
6636         return grow_stack_state(env, state, round_up(-min_off, BPF_REG_SIZE));
6637 }
6638
6639 /* check whether memory at (regno + off) is accessible for t = (read | write)
6640  * if t==write, value_regno is a register which value is stored into memory
6641  * if t==read, value_regno is a register which will receive the value from memory
6642  * if t==write && value_regno==-1, some unknown value is stored into memory
6643  * if t==read && value_regno==-1, don't care what we read from memory
6644  */
6645 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
6646                             int off, int bpf_size, enum bpf_access_type t,
6647                             int value_regno, bool strict_alignment_once, bool is_ldsx)
6648 {
6649         struct bpf_reg_state *regs = cur_regs(env);
6650         struct bpf_reg_state *reg = regs + regno;
6651         int size, err = 0;
6652
6653         size = bpf_size_to_bytes(bpf_size);
6654         if (size < 0)
6655                 return size;
6656
6657         /* alignment checks will add in reg->off themselves */
6658         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
6659         if (err)
6660                 return err;
6661
6662         /* for access checks, reg->off is just part of off */
6663         off += reg->off;
6664
6665         if (reg->type == PTR_TO_MAP_KEY) {
6666                 if (t == BPF_WRITE) {
6667                         verbose(env, "write to change key R%d not allowed\n", regno);
6668                         return -EACCES;
6669                 }
6670
6671                 err = check_mem_region_access(env, regno, off, size,
6672                                               reg->map_ptr->key_size, false);
6673                 if (err)
6674                         return err;
6675                 if (value_regno >= 0)
6676                         mark_reg_unknown(env, regs, value_regno);
6677         } else if (reg->type == PTR_TO_MAP_VALUE) {
6678                 struct btf_field *kptr_field = NULL;
6679
6680                 if (t == BPF_WRITE && value_regno >= 0 &&
6681                     is_pointer_value(env, value_regno)) {
6682                         verbose(env, "R%d leaks addr into map\n", value_regno);
6683                         return -EACCES;
6684                 }
6685                 err = check_map_access_type(env, regno, off, size, t);
6686                 if (err)
6687                         return err;
6688                 err = check_map_access(env, regno, off, size, false, ACCESS_DIRECT);
6689                 if (err)
6690                         return err;
6691                 if (tnum_is_const(reg->var_off))
6692                         kptr_field = btf_record_find(reg->map_ptr->record,
6693                                                      off + reg->var_off.value, BPF_KPTR);
6694                 if (kptr_field) {
6695                         err = check_map_kptr_access(env, regno, value_regno, insn_idx, kptr_field);
6696                 } else if (t == BPF_READ && value_regno >= 0) {
6697                         struct bpf_map *map = reg->map_ptr;
6698
6699                         /* if map is read-only, track its contents as scalars */
6700                         if (tnum_is_const(reg->var_off) &&
6701                             bpf_map_is_rdonly(map) &&
6702                             map->ops->map_direct_value_addr) {
6703                                 int map_off = off + reg->var_off.value;
6704                                 u64 val = 0;
6705
6706                                 err = bpf_map_direct_read(map, map_off, size,
6707                                                           &val, is_ldsx);
6708                                 if (err)
6709                                         return err;
6710
6711                                 regs[value_regno].type = SCALAR_VALUE;
6712                                 __mark_reg_known(&regs[value_regno], val);
6713                         } else {
6714                                 mark_reg_unknown(env, regs, value_regno);
6715                         }
6716                 }
6717         } else if (base_type(reg->type) == PTR_TO_MEM) {
6718                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
6719
6720                 if (type_may_be_null(reg->type)) {
6721                         verbose(env, "R%d invalid mem access '%s'\n", regno,
6722                                 reg_type_str(env, reg->type));
6723                         return -EACCES;
6724                 }
6725
6726                 if (t == BPF_WRITE && rdonly_mem) {
6727                         verbose(env, "R%d cannot write into %s\n",
6728                                 regno, reg_type_str(env, reg->type));
6729                         return -EACCES;
6730                 }
6731
6732                 if (t == BPF_WRITE && value_regno >= 0 &&
6733                     is_pointer_value(env, value_regno)) {
6734                         verbose(env, "R%d leaks addr into mem\n", value_regno);
6735                         return -EACCES;
6736                 }
6737
6738                 err = check_mem_region_access(env, regno, off, size,
6739                                               reg->mem_size, false);
6740                 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem))
6741                         mark_reg_unknown(env, regs, value_regno);
6742         } else if (reg->type == PTR_TO_CTX) {
6743                 enum bpf_reg_type reg_type = SCALAR_VALUE;
6744                 struct btf *btf = NULL;
6745                 u32 btf_id = 0;
6746
6747                 if (t == BPF_WRITE && value_regno >= 0 &&
6748                     is_pointer_value(env, value_regno)) {
6749                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
6750                         return -EACCES;
6751                 }
6752
6753                 err = check_ptr_off_reg(env, reg, regno);
6754                 if (err < 0)
6755                         return err;
6756
6757                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
6758                                        &btf_id);
6759                 if (err)
6760                         verbose_linfo(env, insn_idx, "; ");
6761                 if (!err && t == BPF_READ && value_regno >= 0) {
6762                         /* ctx access returns either a scalar, or a
6763                          * PTR_TO_PACKET[_META,_END]. In the latter
6764                          * case, we know the offset is zero.
6765                          */
6766                         if (reg_type == SCALAR_VALUE) {
6767                                 mark_reg_unknown(env, regs, value_regno);
6768                         } else {
6769                                 mark_reg_known_zero(env, regs,
6770                                                     value_regno);
6771                                 if (type_may_be_null(reg_type))
6772                                         regs[value_regno].id = ++env->id_gen;
6773                                 /* A load of ctx field could have different
6774                                  * actual load size with the one encoded in the
6775                                  * insn. When the dst is PTR, it is for sure not
6776                                  * a sub-register.
6777                                  */
6778                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
6779                                 if (base_type(reg_type) == PTR_TO_BTF_ID) {
6780                                         regs[value_regno].btf = btf;
6781                                         regs[value_regno].btf_id = btf_id;
6782                                 }
6783                         }
6784                         regs[value_regno].type = reg_type;
6785                 }
6786
6787         } else if (reg->type == PTR_TO_STACK) {
6788                 /* Basic bounds checks. */
6789                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
6790                 if (err)
6791                         return err;
6792
6793                 if (t == BPF_READ)
6794                         err = check_stack_read(env, regno, off, size,
6795                                                value_regno);
6796                 else
6797                         err = check_stack_write(env, regno, off, size,
6798                                                 value_regno, insn_idx);
6799         } else if (reg_is_pkt_pointer(reg)) {
6800                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
6801                         verbose(env, "cannot write into packet\n");
6802                         return -EACCES;
6803                 }
6804                 if (t == BPF_WRITE && value_regno >= 0 &&
6805                     is_pointer_value(env, value_regno)) {
6806                         verbose(env, "R%d leaks addr into packet\n",
6807                                 value_regno);
6808                         return -EACCES;
6809                 }
6810                 err = check_packet_access(env, regno, off, size, false);
6811                 if (!err && t == BPF_READ && value_regno >= 0)
6812                         mark_reg_unknown(env, regs, value_regno);
6813         } else if (reg->type == PTR_TO_FLOW_KEYS) {
6814                 if (t == BPF_WRITE && value_regno >= 0 &&
6815                     is_pointer_value(env, value_regno)) {
6816                         verbose(env, "R%d leaks addr into flow keys\n",
6817                                 value_regno);
6818                         return -EACCES;
6819                 }
6820
6821                 err = check_flow_keys_access(env, off, size);
6822                 if (!err && t == BPF_READ && value_regno >= 0)
6823                         mark_reg_unknown(env, regs, value_regno);
6824         } else if (type_is_sk_pointer(reg->type)) {
6825                 if (t == BPF_WRITE) {
6826                         verbose(env, "R%d cannot write into %s\n",
6827                                 regno, reg_type_str(env, reg->type));
6828                         return -EACCES;
6829                 }
6830                 err = check_sock_access(env, insn_idx, regno, off, size, t);
6831                 if (!err && value_regno >= 0)
6832                         mark_reg_unknown(env, regs, value_regno);
6833         } else if (reg->type == PTR_TO_TP_BUFFER) {
6834                 err = check_tp_buffer_access(env, reg, regno, off, size);
6835                 if (!err && t == BPF_READ && value_regno >= 0)
6836                         mark_reg_unknown(env, regs, value_regno);
6837         } else if (base_type(reg->type) == PTR_TO_BTF_ID &&
6838                    !type_may_be_null(reg->type)) {
6839                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
6840                                               value_regno);
6841         } else if (reg->type == CONST_PTR_TO_MAP) {
6842                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
6843                                               value_regno);
6844         } else if (base_type(reg->type) == PTR_TO_BUF) {
6845                 bool rdonly_mem = type_is_rdonly_mem(reg->type);
6846                 u32 *max_access;
6847
6848                 if (rdonly_mem) {
6849                         if (t == BPF_WRITE) {
6850                                 verbose(env, "R%d cannot write into %s\n",
6851                                         regno, reg_type_str(env, reg->type));
6852                                 return -EACCES;
6853                         }
6854                         max_access = &env->prog->aux->max_rdonly_access;
6855                 } else {
6856                         max_access = &env->prog->aux->max_rdwr_access;
6857                 }
6858
6859                 err = check_buffer_access(env, reg, regno, off, size, false,
6860                                           max_access);
6861
6862                 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
6863                         mark_reg_unknown(env, regs, value_regno);
6864         } else {
6865                 verbose(env, "R%d invalid mem access '%s'\n", regno,
6866                         reg_type_str(env, reg->type));
6867                 return -EACCES;
6868         }
6869
6870         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
6871             regs[value_regno].type == SCALAR_VALUE) {
6872                 if (!is_ldsx)
6873                         /* b/h/w load zero-extends, mark upper bits as known 0 */
6874                         coerce_reg_to_size(&regs[value_regno], size);
6875                 else
6876                         coerce_reg_to_size_sx(&regs[value_regno], size);
6877         }
6878         return err;
6879 }
6880
6881 static int check_atomic(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
6882 {
6883         int load_reg;
6884         int err;
6885
6886         switch (insn->imm) {
6887         case BPF_ADD:
6888         case BPF_ADD | BPF_FETCH:
6889         case BPF_AND:
6890         case BPF_AND | BPF_FETCH:
6891         case BPF_OR:
6892         case BPF_OR | BPF_FETCH:
6893         case BPF_XOR:
6894         case BPF_XOR | BPF_FETCH:
6895         case BPF_XCHG:
6896         case BPF_CMPXCHG:
6897                 break;
6898         default:
6899                 verbose(env, "BPF_ATOMIC uses invalid atomic opcode %02x\n", insn->imm);
6900                 return -EINVAL;
6901         }
6902
6903         if (BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) {
6904                 verbose(env, "invalid atomic operand size\n");
6905                 return -EINVAL;
6906         }
6907
6908         /* check src1 operand */
6909         err = check_reg_arg(env, insn->src_reg, SRC_OP);
6910         if (err)
6911                 return err;
6912
6913         /* check src2 operand */
6914         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6915         if (err)
6916                 return err;
6917
6918         if (insn->imm == BPF_CMPXCHG) {
6919                 /* Check comparison of R0 with memory location */
6920                 const u32 aux_reg = BPF_REG_0;
6921
6922                 err = check_reg_arg(env, aux_reg, SRC_OP);
6923                 if (err)
6924                         return err;
6925
6926                 if (is_pointer_value(env, aux_reg)) {
6927                         verbose(env, "R%d leaks addr into mem\n", aux_reg);
6928                         return -EACCES;
6929                 }
6930         }
6931
6932         if (is_pointer_value(env, insn->src_reg)) {
6933                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
6934                 return -EACCES;
6935         }
6936
6937         if (is_ctx_reg(env, insn->dst_reg) ||
6938             is_pkt_reg(env, insn->dst_reg) ||
6939             is_flow_key_reg(env, insn->dst_reg) ||
6940             is_sk_reg(env, insn->dst_reg)) {
6941                 verbose(env, "BPF_ATOMIC stores into R%d %s is not allowed\n",
6942                         insn->dst_reg,
6943                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
6944                 return -EACCES;
6945         }
6946
6947         if (insn->imm & BPF_FETCH) {
6948                 if (insn->imm == BPF_CMPXCHG)
6949                         load_reg = BPF_REG_0;
6950                 else
6951                         load_reg = insn->src_reg;
6952
6953                 /* check and record load of old value */
6954                 err = check_reg_arg(env, load_reg, DST_OP);
6955                 if (err)
6956                         return err;
6957         } else {
6958                 /* This instruction accesses a memory location but doesn't
6959                  * actually load it into a register.
6960                  */
6961                 load_reg = -1;
6962         }
6963
6964         /* Check whether we can read the memory, with second call for fetch
6965          * case to simulate the register fill.
6966          */
6967         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6968                                BPF_SIZE(insn->code), BPF_READ, -1, true, false);
6969         if (!err && load_reg >= 0)
6970                 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6971                                        BPF_SIZE(insn->code), BPF_READ, load_reg,
6972                                        true, false);
6973         if (err)
6974                 return err;
6975
6976         /* Check whether we can write into the same memory. */
6977         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
6978                                BPF_SIZE(insn->code), BPF_WRITE, -1, true, false);
6979         if (err)
6980                 return err;
6981
6982         return 0;
6983 }
6984
6985 /* When register 'regno' is used to read the stack (either directly or through
6986  * a helper function) make sure that it's within stack boundary and, depending
6987  * on the access type and privileges, that all elements of the stack are
6988  * initialized.
6989  *
6990  * 'off' includes 'regno->off', but not its dynamic part (if any).
6991  *
6992  * All registers that have been spilled on the stack in the slots within the
6993  * read offsets are marked as read.
6994  */
6995 static int check_stack_range_initialized(
6996                 struct bpf_verifier_env *env, int regno, int off,
6997                 int access_size, bool zero_size_allowed,
6998                 enum bpf_access_src type, struct bpf_call_arg_meta *meta)
6999 {
7000         struct bpf_reg_state *reg = reg_state(env, regno);
7001         struct bpf_func_state *state = func(env, reg);
7002         int err, min_off, max_off, i, j, slot, spi;
7003         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
7004         enum bpf_access_type bounds_check_type;
7005         /* Some accesses can write anything into the stack, others are
7006          * read-only.
7007          */
7008         bool clobber = false;
7009
7010         if (access_size == 0 && !zero_size_allowed) {
7011                 verbose(env, "invalid zero-sized read\n");
7012                 return -EACCES;
7013         }
7014
7015         if (type == ACCESS_HELPER) {
7016                 /* The bounds checks for writes are more permissive than for
7017                  * reads. However, if raw_mode is not set, we'll do extra
7018                  * checks below.
7019                  */
7020                 bounds_check_type = BPF_WRITE;
7021                 clobber = true;
7022         } else {
7023                 bounds_check_type = BPF_READ;
7024         }
7025         err = check_stack_access_within_bounds(env, regno, off, access_size,
7026                                                type, bounds_check_type);
7027         if (err)
7028                 return err;
7029
7030
7031         if (tnum_is_const(reg->var_off)) {
7032                 min_off = max_off = reg->var_off.value + off;
7033         } else {
7034                 /* Variable offset is prohibited for unprivileged mode for
7035                  * simplicity since it requires corresponding support in
7036                  * Spectre masking for stack ALU.
7037                  * See also retrieve_ptr_limit().
7038                  */
7039                 if (!env->bypass_spec_v1) {
7040                         char tn_buf[48];
7041
7042                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7043                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
7044                                 regno, err_extra, tn_buf);
7045                         return -EACCES;
7046                 }
7047                 /* Only initialized buffer on stack is allowed to be accessed
7048                  * with variable offset. With uninitialized buffer it's hard to
7049                  * guarantee that whole memory is marked as initialized on
7050                  * helper return since specific bounds are unknown what may
7051                  * cause uninitialized stack leaking.
7052                  */
7053                 if (meta && meta->raw_mode)
7054                         meta = NULL;
7055
7056                 min_off = reg->smin_value + off;
7057                 max_off = reg->smax_value + off;
7058         }
7059
7060         if (meta && meta->raw_mode) {
7061                 /* Ensure we won't be overwriting dynptrs when simulating byte
7062                  * by byte access in check_helper_call using meta.access_size.
7063                  * This would be a problem if we have a helper in the future
7064                  * which takes:
7065                  *
7066                  *      helper(uninit_mem, len, dynptr)
7067                  *
7068                  * Now, uninint_mem may overlap with dynptr pointer. Hence, it
7069                  * may end up writing to dynptr itself when touching memory from
7070                  * arg 1. This can be relaxed on a case by case basis for known
7071                  * safe cases, but reject due to the possibilitiy of aliasing by
7072                  * default.
7073                  */
7074                 for (i = min_off; i < max_off + access_size; i++) {
7075                         int stack_off = -i - 1;
7076
7077                         spi = __get_spi(i);
7078                         /* raw_mode may write past allocated_stack */
7079                         if (state->allocated_stack <= stack_off)
7080                                 continue;
7081                         if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
7082                                 verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
7083                                 return -EACCES;
7084                         }
7085                 }
7086                 meta->access_size = access_size;
7087                 meta->regno = regno;
7088                 return 0;
7089         }
7090
7091         for (i = min_off; i < max_off + access_size; i++) {
7092                 u8 *stype;
7093
7094                 slot = -i - 1;
7095                 spi = slot / BPF_REG_SIZE;
7096                 if (state->allocated_stack <= slot) {
7097                         verbose(env, "verifier bug: allocated_stack too small");
7098                         return -EFAULT;
7099                 }
7100
7101                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
7102                 if (*stype == STACK_MISC)
7103                         goto mark;
7104                 if ((*stype == STACK_ZERO) ||
7105                     (*stype == STACK_INVALID && env->allow_uninit_stack)) {
7106                         if (clobber) {
7107                                 /* helper can write anything into the stack */
7108                                 *stype = STACK_MISC;
7109                         }
7110                         goto mark;
7111                 }
7112
7113                 if (is_spilled_reg(&state->stack[spi]) &&
7114                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
7115                      env->allow_ptr_leaks)) {
7116                         if (clobber) {
7117                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
7118                                 for (j = 0; j < BPF_REG_SIZE; j++)
7119                                         scrub_spilled_slot(&state->stack[spi].slot_type[j]);
7120                         }
7121                         goto mark;
7122                 }
7123
7124                 if (tnum_is_const(reg->var_off)) {
7125                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
7126                                 err_extra, regno, min_off, i - min_off, access_size);
7127                 } else {
7128                         char tn_buf[48];
7129
7130                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
7131                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
7132                                 err_extra, regno, tn_buf, i - min_off, access_size);
7133                 }
7134                 return -EACCES;
7135 mark:
7136                 /* reading any byte out of 8-byte 'spill_slot' will cause
7137                  * the whole slot to be marked as 'read'
7138                  */
7139                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
7140                               state->stack[spi].spilled_ptr.parent,
7141                               REG_LIVE_READ64);
7142                 /* We do not set REG_LIVE_WRITTEN for stack slot, as we can not
7143                  * be sure that whether stack slot is written to or not. Hence,
7144                  * we must still conservatively propagate reads upwards even if
7145                  * helper may write to the entire memory range.
7146                  */
7147         }
7148         return 0;
7149 }
7150
7151 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
7152                                    int access_size, bool zero_size_allowed,
7153                                    struct bpf_call_arg_meta *meta)
7154 {
7155         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7156         u32 *max_access;
7157
7158         switch (base_type(reg->type)) {
7159         case PTR_TO_PACKET:
7160         case PTR_TO_PACKET_META:
7161                 return check_packet_access(env, regno, reg->off, access_size,
7162                                            zero_size_allowed);
7163         case PTR_TO_MAP_KEY:
7164                 if (meta && meta->raw_mode) {
7165                         verbose(env, "R%d cannot write into %s\n", regno,
7166                                 reg_type_str(env, reg->type));
7167                         return -EACCES;
7168                 }
7169                 return check_mem_region_access(env, regno, reg->off, access_size,
7170                                                reg->map_ptr->key_size, false);
7171         case PTR_TO_MAP_VALUE:
7172                 if (check_map_access_type(env, regno, reg->off, access_size,
7173                                           meta && meta->raw_mode ? BPF_WRITE :
7174                                           BPF_READ))
7175                         return -EACCES;
7176                 return check_map_access(env, regno, reg->off, access_size,
7177                                         zero_size_allowed, ACCESS_HELPER);
7178         case PTR_TO_MEM:
7179                 if (type_is_rdonly_mem(reg->type)) {
7180                         if (meta && meta->raw_mode) {
7181                                 verbose(env, "R%d cannot write into %s\n", regno,
7182                                         reg_type_str(env, reg->type));
7183                                 return -EACCES;
7184                         }
7185                 }
7186                 return check_mem_region_access(env, regno, reg->off,
7187                                                access_size, reg->mem_size,
7188                                                zero_size_allowed);
7189         case PTR_TO_BUF:
7190                 if (type_is_rdonly_mem(reg->type)) {
7191                         if (meta && meta->raw_mode) {
7192                                 verbose(env, "R%d cannot write into %s\n", regno,
7193                                         reg_type_str(env, reg->type));
7194                                 return -EACCES;
7195                         }
7196
7197                         max_access = &env->prog->aux->max_rdonly_access;
7198                 } else {
7199                         max_access = &env->prog->aux->max_rdwr_access;
7200                 }
7201                 return check_buffer_access(env, reg, regno, reg->off,
7202                                            access_size, zero_size_allowed,
7203                                            max_access);
7204         case PTR_TO_STACK:
7205                 return check_stack_range_initialized(
7206                                 env,
7207                                 regno, reg->off, access_size,
7208                                 zero_size_allowed, ACCESS_HELPER, meta);
7209         case PTR_TO_BTF_ID:
7210                 return check_ptr_to_btf_access(env, regs, regno, reg->off,
7211                                                access_size, BPF_READ, -1);
7212         case PTR_TO_CTX:
7213                 /* in case the function doesn't know how to access the context,
7214                  * (because we are in a program of type SYSCALL for example), we
7215                  * can not statically check its size.
7216                  * Dynamically check it now.
7217                  */
7218                 if (!env->ops->convert_ctx_access) {
7219                         enum bpf_access_type atype = meta && meta->raw_mode ? BPF_WRITE : BPF_READ;
7220                         int offset = access_size - 1;
7221
7222                         /* Allow zero-byte read from PTR_TO_CTX */
7223                         if (access_size == 0)
7224                                 return zero_size_allowed ? 0 : -EACCES;
7225
7226                         return check_mem_access(env, env->insn_idx, regno, offset, BPF_B,
7227                                                 atype, -1, false, false);
7228                 }
7229
7230                 fallthrough;
7231         default: /* scalar_value or invalid ptr */
7232                 /* Allow zero-byte read from NULL, regardless of pointer type */
7233                 if (zero_size_allowed && access_size == 0 &&
7234                     register_is_null(reg))
7235                         return 0;
7236
7237                 verbose(env, "R%d type=%s ", regno,
7238                         reg_type_str(env, reg->type));
7239                 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
7240                 return -EACCES;
7241         }
7242 }
7243
7244 static int check_mem_size_reg(struct bpf_verifier_env *env,
7245                               struct bpf_reg_state *reg, u32 regno,
7246                               bool zero_size_allowed,
7247                               struct bpf_call_arg_meta *meta)
7248 {
7249         int err;
7250
7251         /* This is used to refine r0 return value bounds for helpers
7252          * that enforce this value as an upper bound on return values.
7253          * See do_refine_retval_range() for helpers that can refine
7254          * the return value. C type of helper is u32 so we pull register
7255          * bound from umax_value however, if negative verifier errors
7256          * out. Only upper bounds can be learned because retval is an
7257          * int type and negative retvals are allowed.
7258          */
7259         meta->msize_max_value = reg->umax_value;
7260
7261         /* The register is SCALAR_VALUE; the access check
7262          * happens using its boundaries.
7263          */
7264         if (!tnum_is_const(reg->var_off))
7265                 /* For unprivileged variable accesses, disable raw
7266                  * mode so that the program is required to
7267                  * initialize all the memory that the helper could
7268                  * just partially fill up.
7269                  */
7270                 meta = NULL;
7271
7272         if (reg->smin_value < 0) {
7273                 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
7274                         regno);
7275                 return -EACCES;
7276         }
7277
7278         if (reg->umin_value == 0) {
7279                 err = check_helper_mem_access(env, regno - 1, 0,
7280                                               zero_size_allowed,
7281                                               meta);
7282                 if (err)
7283                         return err;
7284         }
7285
7286         if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
7287                 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
7288                         regno);
7289                 return -EACCES;
7290         }
7291         err = check_helper_mem_access(env, regno - 1,
7292                                       reg->umax_value,
7293                                       zero_size_allowed, meta);
7294         if (!err)
7295                 err = mark_chain_precision(env, regno);
7296         return err;
7297 }
7298
7299 int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7300                    u32 regno, u32 mem_size)
7301 {
7302         bool may_be_null = type_may_be_null(reg->type);
7303         struct bpf_reg_state saved_reg;
7304         struct bpf_call_arg_meta meta;
7305         int err;
7306
7307         if (register_is_null(reg))
7308                 return 0;
7309
7310         memset(&meta, 0, sizeof(meta));
7311         /* Assuming that the register contains a value check if the memory
7312          * access is safe. Temporarily save and restore the register's state as
7313          * the conversion shouldn't be visible to a caller.
7314          */
7315         if (may_be_null) {
7316                 saved_reg = *reg;
7317                 mark_ptr_not_null_reg(reg);
7318         }
7319
7320         err = check_helper_mem_access(env, regno, mem_size, true, &meta);
7321         /* Check access for BPF_WRITE */
7322         meta.raw_mode = true;
7323         err = err ?: check_helper_mem_access(env, regno, mem_size, true, &meta);
7324
7325         if (may_be_null)
7326                 *reg = saved_reg;
7327
7328         return err;
7329 }
7330
7331 static int check_kfunc_mem_size_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
7332                                     u32 regno)
7333 {
7334         struct bpf_reg_state *mem_reg = &cur_regs(env)[regno - 1];
7335         bool may_be_null = type_may_be_null(mem_reg->type);
7336         struct bpf_reg_state saved_reg;
7337         struct bpf_call_arg_meta meta;
7338         int err;
7339
7340         WARN_ON_ONCE(regno < BPF_REG_2 || regno > BPF_REG_5);
7341
7342         memset(&meta, 0, sizeof(meta));
7343
7344         if (may_be_null) {
7345                 saved_reg = *mem_reg;
7346                 mark_ptr_not_null_reg(mem_reg);
7347         }
7348
7349         err = check_mem_size_reg(env, reg, regno, true, &meta);
7350         /* Check access for BPF_WRITE */
7351         meta.raw_mode = true;
7352         err = err ?: check_mem_size_reg(env, reg, regno, true, &meta);
7353
7354         if (may_be_null)
7355                 *mem_reg = saved_reg;
7356         return err;
7357 }
7358
7359 /* Implementation details:
7360  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL.
7361  * bpf_obj_new returns PTR_TO_BTF_ID | MEM_ALLOC | PTR_MAYBE_NULL.
7362  * Two bpf_map_lookups (even with the same key) will have different reg->id.
7363  * Two separate bpf_obj_new will also have different reg->id.
7364  * For traditional PTR_TO_MAP_VALUE or PTR_TO_BTF_ID | MEM_ALLOC, the verifier
7365  * clears reg->id after value_or_null->value transition, since the verifier only
7366  * cares about the range of access to valid map value pointer and doesn't care
7367  * about actual address of the map element.
7368  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
7369  * reg->id > 0 after value_or_null->value transition. By doing so
7370  * two bpf_map_lookups will be considered two different pointers that
7371  * point to different bpf_spin_locks. Likewise for pointers to allocated objects
7372  * returned from bpf_obj_new.
7373  * The verifier allows taking only one bpf_spin_lock at a time to avoid
7374  * dead-locks.
7375  * Since only one bpf_spin_lock is allowed the checks are simpler than
7376  * reg_is_refcounted() logic. The verifier needs to remember only
7377  * one spin_lock instead of array of acquired_refs.
7378  * cur_state->active_lock remembers which map value element or allocated
7379  * object got locked and clears it after bpf_spin_unlock.
7380  */
7381 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
7382                              bool is_lock)
7383 {
7384         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7385         struct bpf_verifier_state *cur = env->cur_state;
7386         bool is_const = tnum_is_const(reg->var_off);
7387         u64 val = reg->var_off.value;
7388         struct bpf_map *map = NULL;
7389         struct btf *btf = NULL;
7390         struct btf_record *rec;
7391
7392         if (!is_const) {
7393                 verbose(env,
7394                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
7395                         regno);
7396                 return -EINVAL;
7397         }
7398         if (reg->type == PTR_TO_MAP_VALUE) {
7399                 map = reg->map_ptr;
7400                 if (!map->btf) {
7401                         verbose(env,
7402                                 "map '%s' has to have BTF in order to use bpf_spin_lock\n",
7403                                 map->name);
7404                         return -EINVAL;
7405                 }
7406         } else {
7407                 btf = reg->btf;
7408         }
7409
7410         rec = reg_btf_record(reg);
7411         if (!btf_record_has_field(rec, BPF_SPIN_LOCK)) {
7412                 verbose(env, "%s '%s' has no valid bpf_spin_lock\n", map ? "map" : "local",
7413                         map ? map->name : "kptr");
7414                 return -EINVAL;
7415         }
7416         if (rec->spin_lock_off != val + reg->off) {
7417                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock' that is at %d\n",
7418                         val + reg->off, rec->spin_lock_off);
7419                 return -EINVAL;
7420         }
7421         if (is_lock) {
7422                 if (cur->active_lock.ptr) {
7423                         verbose(env,
7424                                 "Locking two bpf_spin_locks are not allowed\n");
7425                         return -EINVAL;
7426                 }
7427                 if (map)
7428                         cur->active_lock.ptr = map;
7429                 else
7430                         cur->active_lock.ptr = btf;
7431                 cur->active_lock.id = reg->id;
7432         } else {
7433                 void *ptr;
7434
7435                 if (map)
7436                         ptr = map;
7437                 else
7438                         ptr = btf;
7439
7440                 if (!cur->active_lock.ptr) {
7441                         verbose(env, "bpf_spin_unlock without taking a lock\n");
7442                         return -EINVAL;
7443                 }
7444                 if (cur->active_lock.ptr != ptr ||
7445                     cur->active_lock.id != reg->id) {
7446                         verbose(env, "bpf_spin_unlock of different lock\n");
7447                         return -EINVAL;
7448                 }
7449
7450                 invalidate_non_owning_refs(env);
7451
7452                 cur->active_lock.ptr = NULL;
7453                 cur->active_lock.id = 0;
7454         }
7455         return 0;
7456 }
7457
7458 static int process_timer_func(struct bpf_verifier_env *env, int regno,
7459                               struct bpf_call_arg_meta *meta)
7460 {
7461         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7462         bool is_const = tnum_is_const(reg->var_off);
7463         struct bpf_map *map = reg->map_ptr;
7464         u64 val = reg->var_off.value;
7465
7466         if (!is_const) {
7467                 verbose(env,
7468                         "R%d doesn't have constant offset. bpf_timer has to be at the constant offset\n",
7469                         regno);
7470                 return -EINVAL;
7471         }
7472         if (!map->btf) {
7473                 verbose(env, "map '%s' has to have BTF in order to use bpf_timer\n",
7474                         map->name);
7475                 return -EINVAL;
7476         }
7477         if (!btf_record_has_field(map->record, BPF_TIMER)) {
7478                 verbose(env, "map '%s' has no valid bpf_timer\n", map->name);
7479                 return -EINVAL;
7480         }
7481         if (map->record->timer_off != val + reg->off) {
7482                 verbose(env, "off %lld doesn't point to 'struct bpf_timer' that is at %d\n",
7483                         val + reg->off, map->record->timer_off);
7484                 return -EINVAL;
7485         }
7486         if (meta->map_ptr) {
7487                 verbose(env, "verifier bug. Two map pointers in a timer helper\n");
7488                 return -EFAULT;
7489         }
7490         meta->map_uid = reg->map_uid;
7491         meta->map_ptr = map;
7492         return 0;
7493 }
7494
7495 static int process_kptr_func(struct bpf_verifier_env *env, int regno,
7496                              struct bpf_call_arg_meta *meta)
7497 {
7498         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7499         struct bpf_map *map_ptr = reg->map_ptr;
7500         struct btf_field *kptr_field;
7501         u32 kptr_off;
7502
7503         if (!tnum_is_const(reg->var_off)) {
7504                 verbose(env,
7505                         "R%d doesn't have constant offset. kptr has to be at the constant offset\n",
7506                         regno);
7507                 return -EINVAL;
7508         }
7509         if (!map_ptr->btf) {
7510                 verbose(env, "map '%s' has to have BTF in order to use bpf_kptr_xchg\n",
7511                         map_ptr->name);
7512                 return -EINVAL;
7513         }
7514         if (!btf_record_has_field(map_ptr->record, BPF_KPTR)) {
7515                 verbose(env, "map '%s' has no valid kptr\n", map_ptr->name);
7516                 return -EINVAL;
7517         }
7518
7519         meta->map_ptr = map_ptr;
7520         kptr_off = reg->off + reg->var_off.value;
7521         kptr_field = btf_record_find(map_ptr->record, kptr_off, BPF_KPTR);
7522         if (!kptr_field) {
7523                 verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
7524                 return -EACCES;
7525         }
7526         if (kptr_field->type != BPF_KPTR_REF) {
7527                 verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
7528                 return -EACCES;
7529         }
7530         meta->kptr_field = kptr_field;
7531         return 0;
7532 }
7533
7534 /* There are two register types representing a bpf_dynptr, one is PTR_TO_STACK
7535  * which points to a stack slot, and the other is CONST_PTR_TO_DYNPTR.
7536  *
7537  * In both cases we deal with the first 8 bytes, but need to mark the next 8
7538  * bytes as STACK_DYNPTR in case of PTR_TO_STACK. In case of
7539  * CONST_PTR_TO_DYNPTR, we are guaranteed to get the beginning of the object.
7540  *
7541  * Mutability of bpf_dynptr is at two levels, one is at the level of struct
7542  * bpf_dynptr itself, i.e. whether the helper is receiving a pointer to struct
7543  * bpf_dynptr or pointer to const struct bpf_dynptr. In the former case, it can
7544  * mutate the view of the dynptr and also possibly destroy it. In the latter
7545  * case, it cannot mutate the bpf_dynptr itself but it can still mutate the
7546  * memory that dynptr points to.
7547  *
7548  * The verifier will keep track both levels of mutation (bpf_dynptr's in
7549  * reg->type and the memory's in reg->dynptr.type), but there is no support for
7550  * readonly dynptr view yet, hence only the first case is tracked and checked.
7551  *
7552  * This is consistent with how C applies the const modifier to a struct object,
7553  * where the pointer itself inside bpf_dynptr becomes const but not what it
7554  * points to.
7555  *
7556  * Helpers which do not mutate the bpf_dynptr set MEM_RDONLY in their argument
7557  * type, and declare it as 'const struct bpf_dynptr *' in their prototype.
7558  */
7559 static int process_dynptr_func(struct bpf_verifier_env *env, int regno, int insn_idx,
7560                                enum bpf_arg_type arg_type, int clone_ref_obj_id)
7561 {
7562         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7563         int err;
7564
7565         /* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
7566          * ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
7567          */
7568         if ((arg_type & (MEM_UNINIT | MEM_RDONLY)) == (MEM_UNINIT | MEM_RDONLY)) {
7569                 verbose(env, "verifier internal error: misconfigured dynptr helper type flags\n");
7570                 return -EFAULT;
7571         }
7572
7573         /*  MEM_UNINIT - Points to memory that is an appropriate candidate for
7574          *               constructing a mutable bpf_dynptr object.
7575          *
7576          *               Currently, this is only possible with PTR_TO_STACK
7577          *               pointing to a region of at least 16 bytes which doesn't
7578          *               contain an existing bpf_dynptr.
7579          *
7580          *  MEM_RDONLY - Points to a initialized bpf_dynptr that will not be
7581          *               mutated or destroyed. However, the memory it points to
7582          *               may be mutated.
7583          *
7584          *  None       - Points to a initialized dynptr that can be mutated and
7585          *               destroyed, including mutation of the memory it points
7586          *               to.
7587          */
7588         if (arg_type & MEM_UNINIT) {
7589                 int i;
7590
7591                 if (!is_dynptr_reg_valid_uninit(env, reg)) {
7592                         verbose(env, "Dynptr has to be an uninitialized dynptr\n");
7593                         return -EINVAL;
7594                 }
7595
7596                 /* we write BPF_DW bits (8 bytes) at a time */
7597                 for (i = 0; i < BPF_DYNPTR_SIZE; i += 8) {
7598                         err = check_mem_access(env, insn_idx, regno,
7599                                                i, BPF_DW, BPF_WRITE, -1, false, false);
7600                         if (err)
7601                                 return err;
7602                 }
7603
7604                 err = mark_stack_slots_dynptr(env, reg, arg_type, insn_idx, clone_ref_obj_id);
7605         } else /* MEM_RDONLY and None case from above */ {
7606                 /* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
7607                 if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
7608                         verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
7609                         return -EINVAL;
7610                 }
7611
7612                 if (!is_dynptr_reg_valid_init(env, reg)) {
7613                         verbose(env,
7614                                 "Expected an initialized dynptr as arg #%d\n",
7615                                 regno);
7616                         return -EINVAL;
7617                 }
7618
7619                 /* Fold modifiers (in this case, MEM_RDONLY) when checking expected type */
7620                 if (!is_dynptr_type_expected(env, reg, arg_type & ~MEM_RDONLY)) {
7621                         verbose(env,
7622                                 "Expected a dynptr of type %s as arg #%d\n",
7623                                 dynptr_type_str(arg_to_dynptr_type(arg_type)), regno);
7624                         return -EINVAL;
7625                 }
7626
7627                 err = mark_dynptr_read(env, reg);
7628         }
7629         return err;
7630 }
7631
7632 static u32 iter_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int spi)
7633 {
7634         struct bpf_func_state *state = func(env, reg);
7635
7636         return state->stack[spi].spilled_ptr.ref_obj_id;
7637 }
7638
7639 static bool is_iter_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7640 {
7641         return meta->kfunc_flags & (KF_ITER_NEW | KF_ITER_NEXT | KF_ITER_DESTROY);
7642 }
7643
7644 static bool is_iter_new_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7645 {
7646         return meta->kfunc_flags & KF_ITER_NEW;
7647 }
7648
7649 static bool is_iter_next_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7650 {
7651         return meta->kfunc_flags & KF_ITER_NEXT;
7652 }
7653
7654 static bool is_iter_destroy_kfunc(struct bpf_kfunc_call_arg_meta *meta)
7655 {
7656         return meta->kfunc_flags & KF_ITER_DESTROY;
7657 }
7658
7659 static bool is_kfunc_arg_iter(struct bpf_kfunc_call_arg_meta *meta, int arg)
7660 {
7661         /* btf_check_iter_kfuncs() guarantees that first argument of any iter
7662          * kfunc is iter state pointer
7663          */
7664         return arg == 0 && is_iter_kfunc(meta);
7665 }
7666
7667 static int process_iter_arg(struct bpf_verifier_env *env, int regno, int insn_idx,
7668                             struct bpf_kfunc_call_arg_meta *meta)
7669 {
7670         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
7671         const struct btf_type *t;
7672         const struct btf_param *arg;
7673         int spi, err, i, nr_slots;
7674         u32 btf_id;
7675
7676         /* btf_check_iter_kfuncs() ensures we don't need to validate anything here */
7677         arg = &btf_params(meta->func_proto)[0];
7678         t = btf_type_skip_modifiers(meta->btf, arg->type, NULL);        /* PTR */
7679         t = btf_type_skip_modifiers(meta->btf, t->type, &btf_id);       /* STRUCT */
7680         nr_slots = t->size / BPF_REG_SIZE;
7681
7682         if (is_iter_new_kfunc(meta)) {
7683                 /* bpf_iter_<type>_new() expects pointer to uninit iter state */
7684                 if (!is_iter_reg_valid_uninit(env, reg, nr_slots)) {
7685                         verbose(env, "expected uninitialized iter_%s as arg #%d\n",
7686                                 iter_type_str(meta->btf, btf_id), regno);
7687                         return -EINVAL;
7688                 }
7689
7690                 for (i = 0; i < nr_slots * 8; i += BPF_REG_SIZE) {
7691                         err = check_mem_access(env, insn_idx, regno,
7692                                                i, BPF_DW, BPF_WRITE, -1, false, false);
7693                         if (err)
7694                                 return err;
7695                 }
7696
7697                 err = mark_stack_slots_iter(env, reg, insn_idx, meta->btf, btf_id, nr_slots);
7698                 if (err)
7699                         return err;
7700         } else {
7701                 /* iter_next() or iter_destroy() expect initialized iter state*/
7702                 if (!is_iter_reg_valid_init(env, reg, meta->btf, btf_id, nr_slots)) {
7703                         verbose(env, "expected an initialized iter_%s as arg #%d\n",
7704                                 iter_type_str(meta->btf, btf_id), regno);
7705                         return -EINVAL;
7706                 }
7707
7708                 spi = iter_get_spi(env, reg, nr_slots);
7709                 if (spi < 0)
7710                         return spi;
7711
7712                 err = mark_iter_read(env, reg, spi, nr_slots);
7713                 if (err)
7714                         return err;
7715
7716                 /* remember meta->iter info for process_iter_next_call() */
7717                 meta->iter.spi = spi;
7718                 meta->iter.frameno = reg->frameno;
7719                 meta->ref_obj_id = iter_ref_obj_id(env, reg, spi);
7720
7721                 if (is_iter_destroy_kfunc(meta)) {
7722                         err = unmark_stack_slots_iter(env, reg, nr_slots);
7723                         if (err)
7724                                 return err;
7725                 }
7726         }
7727
7728         return 0;
7729 }
7730
7731 /* Look for a previous loop entry at insn_idx: nearest parent state
7732  * stopped at insn_idx with callsites matching those in cur->frame.
7733  */
7734 static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env,
7735                                                   struct bpf_verifier_state *cur,
7736                                                   int insn_idx)
7737 {
7738         struct bpf_verifier_state_list *sl;
7739         struct bpf_verifier_state *st;
7740
7741         /* Explored states are pushed in stack order, most recent states come first */
7742         sl = *explored_state(env, insn_idx);
7743         for (; sl; sl = sl->next) {
7744                 /* If st->branches != 0 state is a part of current DFS verification path,
7745                  * hence cur & st for a loop.
7746                  */
7747                 st = &sl->state;
7748                 if (st->insn_idx == insn_idx && st->branches && same_callsites(st, cur) &&
7749                     st->dfs_depth < cur->dfs_depth)
7750                         return st;
7751         }
7752
7753         return NULL;
7754 }
7755
7756 static void reset_idmap_scratch(struct bpf_verifier_env *env);
7757 static bool regs_exact(const struct bpf_reg_state *rold,
7758                        const struct bpf_reg_state *rcur,
7759                        struct bpf_idmap *idmap);
7760
7761 static void maybe_widen_reg(struct bpf_verifier_env *env,
7762                             struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
7763                             struct bpf_idmap *idmap)
7764 {
7765         if (rold->type != SCALAR_VALUE)
7766                 return;
7767         if (rold->type != rcur->type)
7768                 return;
7769         if (rold->precise || rcur->precise || regs_exact(rold, rcur, idmap))
7770                 return;
7771         __mark_reg_unknown(env, rcur);
7772 }
7773
7774 static int widen_imprecise_scalars(struct bpf_verifier_env *env,
7775                                    struct bpf_verifier_state *old,
7776                                    struct bpf_verifier_state *cur)
7777 {
7778         struct bpf_func_state *fold, *fcur;
7779         int i, fr;
7780
7781         reset_idmap_scratch(env);
7782         for (fr = old->curframe; fr >= 0; fr--) {
7783                 fold = old->frame[fr];
7784                 fcur = cur->frame[fr];
7785
7786                 for (i = 0; i < MAX_BPF_REG; i++)
7787                         maybe_widen_reg(env,
7788                                         &fold->regs[i],
7789                                         &fcur->regs[i],
7790                                         &env->idmap_scratch);
7791
7792                 for (i = 0; i < fold->allocated_stack / BPF_REG_SIZE; i++) {
7793                         if (!is_spilled_reg(&fold->stack[i]) ||
7794                             !is_spilled_reg(&fcur->stack[i]))
7795                                 continue;
7796
7797                         maybe_widen_reg(env,
7798                                         &fold->stack[i].spilled_ptr,
7799                                         &fcur->stack[i].spilled_ptr,
7800                                         &env->idmap_scratch);
7801                 }
7802         }
7803         return 0;
7804 }
7805
7806 /* process_iter_next_call() is called when verifier gets to iterator's next
7807  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
7808  * to it as just "iter_next()" in comments below.
7809  *
7810  * BPF verifier relies on a crucial contract for any iter_next()
7811  * implementation: it should *eventually* return NULL, and once that happens
7812  * it should keep returning NULL. That is, once iterator exhausts elements to
7813  * iterate, it should never reset or spuriously return new elements.
7814  *
7815  * With the assumption of such contract, process_iter_next_call() simulates
7816  * a fork in the verifier state to validate loop logic correctness and safety
7817  * without having to simulate infinite amount of iterations.
7818  *
7819  * In current state, we first assume that iter_next() returned NULL and
7820  * iterator state is set to DRAINED (BPF_ITER_STATE_DRAINED). In such
7821  * conditions we should not form an infinite loop and should eventually reach
7822  * exit.
7823  *
7824  * Besides that, we also fork current state and enqueue it for later
7825  * verification. In a forked state we keep iterator state as ACTIVE
7826  * (BPF_ITER_STATE_ACTIVE) and assume non-NULL return from iter_next(). We
7827  * also bump iteration depth to prevent erroneous infinite loop detection
7828  * later on (see iter_active_depths_differ() comment for details). In this
7829  * state we assume that we'll eventually loop back to another iter_next()
7830  * calls (it could be in exactly same location or in some other instruction,
7831  * it doesn't matter, we don't make any unnecessary assumptions about this,
7832  * everything revolves around iterator state in a stack slot, not which
7833  * instruction is calling iter_next()). When that happens, we either will come
7834  * to iter_next() with equivalent state and can conclude that next iteration
7835  * will proceed in exactly the same way as we just verified, so it's safe to
7836  * assume that loop converges. If not, we'll go on another iteration
7837  * simulation with a different input state, until all possible starting states
7838  * are validated or we reach maximum number of instructions limit.
7839  *
7840  * This way, we will either exhaustively discover all possible input states
7841  * that iterator loop can start with and eventually will converge, or we'll
7842  * effectively regress into bounded loop simulation logic and either reach
7843  * maximum number of instructions if loop is not provably convergent, or there
7844  * is some statically known limit on number of iterations (e.g., if there is
7845  * an explicit `if n > 100 then break;` statement somewhere in the loop).
7846  *
7847  * Iteration convergence logic in is_state_visited() relies on exact
7848  * states comparison, which ignores read and precision marks.
7849  * This is necessary because read and precision marks are not finalized
7850  * while in the loop. Exact comparison might preclude convergence for
7851  * simple programs like below:
7852  *
7853  *     i = 0;
7854  *     while(iter_next(&it))
7855  *       i++;
7856  *
7857  * At each iteration step i++ would produce a new distinct state and
7858  * eventually instruction processing limit would be reached.
7859  *
7860  * To avoid such behavior speculatively forget (widen) range for
7861  * imprecise scalar registers, if those registers were not precise at the
7862  * end of the previous iteration and do not match exactly.
7863  *
7864  * This is a conservative heuristic that allows to verify wide range of programs,
7865  * however it precludes verification of programs that conjure an
7866  * imprecise value on the first loop iteration and use it as precise on a second.
7867  * For example, the following safe program would fail to verify:
7868  *
7869  *     struct bpf_num_iter it;
7870  *     int arr[10];
7871  *     int i = 0, a = 0;
7872  *     bpf_iter_num_new(&it, 0, 10);
7873  *     while (bpf_iter_num_next(&it)) {
7874  *       if (a == 0) {
7875  *         a = 1;
7876  *         i = 7; // Because i changed verifier would forget
7877  *                // it's range on second loop entry.
7878  *       } else {
7879  *         arr[i] = 42; // This would fail to verify.
7880  *       }
7881  *     }
7882  *     bpf_iter_num_destroy(&it);
7883  */
7884 static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
7885                                   struct bpf_kfunc_call_arg_meta *meta)
7886 {
7887         struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
7888         struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
7889         struct bpf_reg_state *cur_iter, *queued_iter;
7890         int iter_frameno = meta->iter.frameno;
7891         int iter_spi = meta->iter.spi;
7892
7893         BTF_TYPE_EMIT(struct bpf_iter);
7894
7895         cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7896
7897         if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
7898             cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
7899                 verbose(env, "verifier internal error: unexpected iterator state %d (%s)\n",
7900                         cur_iter->iter.state, iter_state_str(cur_iter->iter.state));
7901                 return -EFAULT;
7902         }
7903
7904         if (cur_iter->iter.state == BPF_ITER_STATE_ACTIVE) {
7905                 /* Because iter_next() call is a checkpoint is_state_visitied()
7906                  * should guarantee parent state with same call sites and insn_idx.
7907                  */
7908                 if (!cur_st->parent || cur_st->parent->insn_idx != insn_idx ||
7909                     !same_callsites(cur_st->parent, cur_st)) {
7910                         verbose(env, "bug: bad parent state for iter next call");
7911                         return -EFAULT;
7912                 }
7913                 /* Note cur_st->parent in the call below, it is necessary to skip
7914                  * checkpoint created for cur_st by is_state_visited()
7915                  * right at this instruction.
7916                  */
7917                 prev_st = find_prev_entry(env, cur_st->parent, insn_idx);
7918                 /* branch out active iter state */
7919                 queued_st = push_stack(env, insn_idx + 1, insn_idx, false);
7920                 if (!queued_st)
7921                         return -ENOMEM;
7922
7923                 queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
7924                 queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
7925                 queued_iter->iter.depth++;
7926                 if (prev_st)
7927                         widen_imprecise_scalars(env, prev_st, queued_st);
7928
7929                 queued_fr = queued_st->frame[queued_st->curframe];
7930                 mark_ptr_not_null_reg(&queued_fr->regs[BPF_REG_0]);
7931         }
7932
7933         /* switch to DRAINED state, but keep the depth unchanged */
7934         /* mark current iter state as drained and assume returned NULL */
7935         cur_iter->iter.state = BPF_ITER_STATE_DRAINED;
7936         __mark_reg_const_zero(&cur_fr->regs[BPF_REG_0]);
7937
7938         return 0;
7939 }
7940
7941 static bool arg_type_is_mem_size(enum bpf_arg_type type)
7942 {
7943         return type == ARG_CONST_SIZE ||
7944                type == ARG_CONST_SIZE_OR_ZERO;
7945 }
7946
7947 static bool arg_type_is_release(enum bpf_arg_type type)
7948 {
7949         return type & OBJ_RELEASE;
7950 }
7951
7952 static bool arg_type_is_dynptr(enum bpf_arg_type type)
7953 {
7954         return base_type(type) == ARG_PTR_TO_DYNPTR;
7955 }
7956
7957 static int int_ptr_type_to_size(enum bpf_arg_type type)
7958 {
7959         if (type == ARG_PTR_TO_INT)
7960                 return sizeof(u32);
7961         else if (type == ARG_PTR_TO_LONG)
7962                 return sizeof(u64);
7963
7964         return -EINVAL;
7965 }
7966
7967 static int resolve_map_arg_type(struct bpf_verifier_env *env,
7968                                  const struct bpf_call_arg_meta *meta,
7969                                  enum bpf_arg_type *arg_type)
7970 {
7971         if (!meta->map_ptr) {
7972                 /* kernel subsystem misconfigured verifier */
7973                 verbose(env, "invalid map_ptr to access map->type\n");
7974                 return -EACCES;
7975         }
7976
7977         switch (meta->map_ptr->map_type) {
7978         case BPF_MAP_TYPE_SOCKMAP:
7979         case BPF_MAP_TYPE_SOCKHASH:
7980                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
7981                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
7982                 } else {
7983                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
7984                         return -EINVAL;
7985                 }
7986                 break;
7987         case BPF_MAP_TYPE_BLOOM_FILTER:
7988                 if (meta->func_id == BPF_FUNC_map_peek_elem)
7989                         *arg_type = ARG_PTR_TO_MAP_VALUE;
7990                 break;
7991         default:
7992                 break;
7993         }
7994         return 0;
7995 }
7996
7997 struct bpf_reg_types {
7998         const enum bpf_reg_type types[10];
7999         u32 *btf_id;
8000 };
8001
8002 static const struct bpf_reg_types sock_types = {
8003         .types = {
8004                 PTR_TO_SOCK_COMMON,
8005                 PTR_TO_SOCKET,
8006                 PTR_TO_TCP_SOCK,
8007                 PTR_TO_XDP_SOCK,
8008         },
8009 };
8010
8011 #ifdef CONFIG_NET
8012 static const struct bpf_reg_types btf_id_sock_common_types = {
8013         .types = {
8014                 PTR_TO_SOCK_COMMON,
8015                 PTR_TO_SOCKET,
8016                 PTR_TO_TCP_SOCK,
8017                 PTR_TO_XDP_SOCK,
8018                 PTR_TO_BTF_ID,
8019                 PTR_TO_BTF_ID | PTR_TRUSTED,
8020         },
8021         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
8022 };
8023 #endif
8024
8025 static const struct bpf_reg_types mem_types = {
8026         .types = {
8027                 PTR_TO_STACK,
8028                 PTR_TO_PACKET,
8029                 PTR_TO_PACKET_META,
8030                 PTR_TO_MAP_KEY,
8031                 PTR_TO_MAP_VALUE,
8032                 PTR_TO_MEM,
8033                 PTR_TO_MEM | MEM_RINGBUF,
8034                 PTR_TO_BUF,
8035                 PTR_TO_BTF_ID | PTR_TRUSTED,
8036         },
8037 };
8038
8039 static const struct bpf_reg_types int_ptr_types = {
8040         .types = {
8041                 PTR_TO_STACK,
8042                 PTR_TO_PACKET,
8043                 PTR_TO_PACKET_META,
8044                 PTR_TO_MAP_KEY,
8045                 PTR_TO_MAP_VALUE,
8046         },
8047 };
8048
8049 static const struct bpf_reg_types spin_lock_types = {
8050         .types = {
8051                 PTR_TO_MAP_VALUE,
8052                 PTR_TO_BTF_ID | MEM_ALLOC,
8053         }
8054 };
8055
8056 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
8057 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
8058 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
8059 static const struct bpf_reg_types ringbuf_mem_types = { .types = { PTR_TO_MEM | MEM_RINGBUF } };
8060 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
8061 static const struct bpf_reg_types btf_ptr_types = {
8062         .types = {
8063                 PTR_TO_BTF_ID,
8064                 PTR_TO_BTF_ID | PTR_TRUSTED,
8065                 PTR_TO_BTF_ID | MEM_RCU,
8066         },
8067 };
8068 static const struct bpf_reg_types percpu_btf_ptr_types = {
8069         .types = {
8070                 PTR_TO_BTF_ID | MEM_PERCPU,
8071                 PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED,
8072         }
8073 };
8074 static const struct bpf_reg_types func_ptr_types = { .types = { PTR_TO_FUNC } };
8075 static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK } };
8076 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
8077 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
8078 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
8079 static const struct bpf_reg_types dynptr_types = {
8080         .types = {
8081                 PTR_TO_STACK,
8082                 CONST_PTR_TO_DYNPTR,
8083         }
8084 };
8085
8086 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
8087         [ARG_PTR_TO_MAP_KEY]            = &mem_types,
8088         [ARG_PTR_TO_MAP_VALUE]          = &mem_types,
8089         [ARG_CONST_SIZE]                = &scalar_types,
8090         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
8091         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
8092         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
8093         [ARG_PTR_TO_CTX]                = &context_types,
8094         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
8095 #ifdef CONFIG_NET
8096         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
8097 #endif
8098         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
8099         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
8100         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
8101         [ARG_PTR_TO_MEM]                = &mem_types,
8102         [ARG_PTR_TO_RINGBUF_MEM]        = &ringbuf_mem_types,
8103         [ARG_PTR_TO_INT]                = &int_ptr_types,
8104         [ARG_PTR_TO_LONG]               = &int_ptr_types,
8105         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
8106         [ARG_PTR_TO_FUNC]               = &func_ptr_types,
8107         [ARG_PTR_TO_STACK]              = &stack_ptr_types,
8108         [ARG_PTR_TO_CONST_STR]          = &const_str_ptr_types,
8109         [ARG_PTR_TO_TIMER]              = &timer_types,
8110         [ARG_PTR_TO_KPTR]               = &kptr_types,
8111         [ARG_PTR_TO_DYNPTR]             = &dynptr_types,
8112 };
8113
8114 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
8115                           enum bpf_arg_type arg_type,
8116                           const u32 *arg_btf_id,
8117                           struct bpf_call_arg_meta *meta)
8118 {
8119         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8120         enum bpf_reg_type expected, type = reg->type;
8121         const struct bpf_reg_types *compatible;
8122         int i, j;
8123
8124         compatible = compatible_reg_types[base_type(arg_type)];
8125         if (!compatible) {
8126                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
8127                 return -EFAULT;
8128         }
8129
8130         /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
8131          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
8132          *
8133          * Same for MAYBE_NULL:
8134          *
8135          * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
8136          * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
8137          *
8138          * ARG_PTR_TO_MEM is compatible with PTR_TO_MEM that is tagged with a dynptr type.
8139          *
8140          * Therefore we fold these flags depending on the arg_type before comparison.
8141          */
8142         if (arg_type & MEM_RDONLY)
8143                 type &= ~MEM_RDONLY;
8144         if (arg_type & PTR_MAYBE_NULL)
8145                 type &= ~PTR_MAYBE_NULL;
8146         if (base_type(arg_type) == ARG_PTR_TO_MEM)
8147                 type &= ~DYNPTR_TYPE_FLAG_MASK;
8148
8149         if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
8150                 type &= ~MEM_ALLOC;
8151
8152         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
8153                 expected = compatible->types[i];
8154                 if (expected == NOT_INIT)
8155                         break;
8156
8157                 if (type == expected)
8158                         goto found;
8159         }
8160
8161         verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
8162         for (j = 0; j + 1 < i; j++)
8163                 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
8164         verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
8165         return -EACCES;
8166
8167 found:
8168         if (base_type(reg->type) != PTR_TO_BTF_ID)
8169                 return 0;
8170
8171         if (compatible == &mem_types) {
8172                 if (!(arg_type & MEM_RDONLY)) {
8173                         verbose(env,
8174                                 "%s() may write into memory pointed by R%d type=%s\n",
8175                                 func_id_name(meta->func_id),
8176                                 regno, reg_type_str(env, reg->type));
8177                         return -EACCES;
8178                 }
8179                 return 0;
8180         }
8181
8182         switch ((int)reg->type) {
8183         case PTR_TO_BTF_ID:
8184         case PTR_TO_BTF_ID | PTR_TRUSTED:
8185         case PTR_TO_BTF_ID | MEM_RCU:
8186         case PTR_TO_BTF_ID | PTR_MAYBE_NULL:
8187         case PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU:
8188         {
8189                 /* For bpf_sk_release, it needs to match against first member
8190                  * 'struct sock_common', hence make an exception for it. This
8191                  * allows bpf_sk_release to work for multiple socket types.
8192                  */
8193                 bool strict_type_match = arg_type_is_release(arg_type) &&
8194                                          meta->func_id != BPF_FUNC_sk_release;
8195
8196                 if (type_may_be_null(reg->type) &&
8197                     (!type_may_be_null(arg_type) || arg_type_is_release(arg_type))) {
8198                         verbose(env, "Possibly NULL pointer passed to helper arg%d\n", regno);
8199                         return -EACCES;
8200                 }
8201
8202                 if (!arg_btf_id) {
8203                         if (!compatible->btf_id) {
8204                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
8205                                 return -EFAULT;
8206                         }
8207                         arg_btf_id = compatible->btf_id;
8208                 }
8209
8210                 if (meta->func_id == BPF_FUNC_kptr_xchg) {
8211                         if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8212                                 return -EACCES;
8213                 } else {
8214                         if (arg_btf_id == BPF_PTR_POISON) {
8215                                 verbose(env, "verifier internal error:");
8216                                 verbose(env, "R%d has non-overwritten BPF_PTR_POISON type\n",
8217                                         regno);
8218                                 return -EACCES;
8219                         }
8220
8221                         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
8222                                                   btf_vmlinux, *arg_btf_id,
8223                                                   strict_type_match)) {
8224                                 verbose(env, "R%d is of type %s but %s is expected\n",
8225                                         regno, btf_type_name(reg->btf, reg->btf_id),
8226                                         btf_type_name(btf_vmlinux, *arg_btf_id));
8227                                 return -EACCES;
8228                         }
8229                 }
8230                 break;
8231         }
8232         case PTR_TO_BTF_ID | MEM_ALLOC:
8233                 if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
8234                     meta->func_id != BPF_FUNC_kptr_xchg) {
8235                         verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
8236                         return -EFAULT;
8237                 }
8238                 if (meta->func_id == BPF_FUNC_kptr_xchg) {
8239                         if (map_kptr_match_type(env, meta->kptr_field, reg, regno))
8240                                 return -EACCES;
8241                 }
8242                 break;
8243         case PTR_TO_BTF_ID | MEM_PERCPU:
8244         case PTR_TO_BTF_ID | MEM_PERCPU | PTR_TRUSTED:
8245                 /* Handled by helper specific checks */
8246                 break;
8247         default:
8248                 verbose(env, "verifier internal error: invalid PTR_TO_BTF_ID register for type match\n");
8249                 return -EFAULT;
8250         }
8251         return 0;
8252 }
8253
8254 static struct btf_field *
8255 reg_find_field_offset(const struct bpf_reg_state *reg, s32 off, u32 fields)
8256 {
8257         struct btf_field *field;
8258         struct btf_record *rec;
8259
8260         rec = reg_btf_record(reg);
8261         if (!rec)
8262                 return NULL;
8263
8264         field = btf_record_find(rec, off, fields);
8265         if (!field)
8266                 return NULL;
8267
8268         return field;
8269 }
8270
8271 int check_func_arg_reg_off(struct bpf_verifier_env *env,
8272                            const struct bpf_reg_state *reg, int regno,
8273                            enum bpf_arg_type arg_type)
8274 {
8275         u32 type = reg->type;
8276
8277         /* When referenced register is passed to release function, its fixed
8278          * offset must be 0.
8279          *
8280          * We will check arg_type_is_release reg has ref_obj_id when storing
8281          * meta->release_regno.
8282          */
8283         if (arg_type_is_release(arg_type)) {
8284                 /* ARG_PTR_TO_DYNPTR with OBJ_RELEASE is a bit special, as it
8285                  * may not directly point to the object being released, but to
8286                  * dynptr pointing to such object, which might be at some offset
8287                  * on the stack. In that case, we simply to fallback to the
8288                  * default handling.
8289                  */
8290                 if (arg_type_is_dynptr(arg_type) && type == PTR_TO_STACK)
8291                         return 0;
8292
8293                 /* Doing check_ptr_off_reg check for the offset will catch this
8294                  * because fixed_off_ok is false, but checking here allows us
8295                  * to give the user a better error message.
8296                  */
8297                 if (reg->off) {
8298                         verbose(env, "R%d must have zero offset when passed to release func or trusted arg to kfunc\n",
8299                                 regno);
8300                         return -EINVAL;
8301                 }
8302                 return __check_ptr_off_reg(env, reg, regno, false);
8303         }
8304
8305         switch (type) {
8306         /* Pointer types where both fixed and variable offset is explicitly allowed: */
8307         case PTR_TO_STACK:
8308         case PTR_TO_PACKET:
8309         case PTR_TO_PACKET_META:
8310         case PTR_TO_MAP_KEY:
8311         case PTR_TO_MAP_VALUE:
8312         case PTR_TO_MEM:
8313         case PTR_TO_MEM | MEM_RDONLY:
8314         case PTR_TO_MEM | MEM_RINGBUF:
8315         case PTR_TO_BUF:
8316         case PTR_TO_BUF | MEM_RDONLY:
8317         case SCALAR_VALUE:
8318                 return 0;
8319         /* All the rest must be rejected, except PTR_TO_BTF_ID which allows
8320          * fixed offset.
8321          */
8322         case PTR_TO_BTF_ID:
8323         case PTR_TO_BTF_ID | MEM_ALLOC:
8324         case PTR_TO_BTF_ID | PTR_TRUSTED:
8325         case PTR_TO_BTF_ID | MEM_RCU:
8326         case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF:
8327         case PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU:
8328                 /* When referenced PTR_TO_BTF_ID is passed to release function,
8329                  * its fixed offset must be 0. In the other cases, fixed offset
8330                  * can be non-zero. This was already checked above. So pass
8331                  * fixed_off_ok as true to allow fixed offset for all other
8332                  * cases. var_off always must be 0 for PTR_TO_BTF_ID, hence we
8333                  * still need to do checks instead of returning.
8334                  */
8335                 return __check_ptr_off_reg(env, reg, regno, true);
8336         default:
8337                 return __check_ptr_off_reg(env, reg, regno, false);
8338         }
8339 }
8340
8341 static struct bpf_reg_state *get_dynptr_arg_reg(struct bpf_verifier_env *env,
8342                                                 const struct bpf_func_proto *fn,
8343                                                 struct bpf_reg_state *regs)
8344 {
8345         struct bpf_reg_state *state = NULL;
8346         int i;
8347
8348         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++)
8349                 if (arg_type_is_dynptr(fn->arg_type[i])) {
8350                         if (state) {
8351                                 verbose(env, "verifier internal error: multiple dynptr args\n");
8352                                 return NULL;
8353                         }
8354                         state = &regs[BPF_REG_1 + i];
8355                 }
8356
8357         if (!state)
8358                 verbose(env, "verifier internal error: no dynptr arg found\n");
8359
8360         return state;
8361 }
8362
8363 static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8364 {
8365         struct bpf_func_state *state = func(env, reg);
8366         int spi;
8367
8368         if (reg->type == CONST_PTR_TO_DYNPTR)
8369                 return reg->id;
8370         spi = dynptr_get_spi(env, reg);
8371         if (spi < 0)
8372                 return spi;
8373         return state->stack[spi].spilled_ptr.id;
8374 }
8375
8376 static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
8377 {
8378         struct bpf_func_state *state = func(env, reg);
8379         int spi;
8380
8381         if (reg->type == CONST_PTR_TO_DYNPTR)
8382                 return reg->ref_obj_id;
8383         spi = dynptr_get_spi(env, reg);
8384         if (spi < 0)
8385                 return spi;
8386         return state->stack[spi].spilled_ptr.ref_obj_id;
8387 }
8388
8389 static enum bpf_dynptr_type dynptr_get_type(struct bpf_verifier_env *env,
8390                                             struct bpf_reg_state *reg)
8391 {
8392         struct bpf_func_state *state = func(env, reg);
8393         int spi;
8394
8395         if (reg->type == CONST_PTR_TO_DYNPTR)
8396                 return reg->dynptr.type;
8397
8398         spi = __get_spi(reg->off);
8399         if (spi < 0) {
8400                 verbose(env, "verifier internal error: invalid spi when querying dynptr type\n");
8401                 return BPF_DYNPTR_TYPE_INVALID;
8402         }
8403
8404         return state->stack[spi].spilled_ptr.dynptr.type;
8405 }
8406
8407 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
8408                           struct bpf_call_arg_meta *meta,
8409                           const struct bpf_func_proto *fn,
8410                           int insn_idx)
8411 {
8412         u32 regno = BPF_REG_1 + arg;
8413         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
8414         enum bpf_arg_type arg_type = fn->arg_type[arg];
8415         enum bpf_reg_type type = reg->type;
8416         u32 *arg_btf_id = NULL;
8417         int err = 0;
8418
8419         if (arg_type == ARG_DONTCARE)
8420                 return 0;
8421
8422         err = check_reg_arg(env, regno, SRC_OP);
8423         if (err)
8424                 return err;
8425
8426         if (arg_type == ARG_ANYTHING) {
8427                 if (is_pointer_value(env, regno)) {
8428                         verbose(env, "R%d leaks addr into helper function\n",
8429                                 regno);
8430                         return -EACCES;
8431                 }
8432                 return 0;
8433         }
8434
8435         if (type_is_pkt_pointer(type) &&
8436             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
8437                 verbose(env, "helper access to the packet is not allowed\n");
8438                 return -EACCES;
8439         }
8440
8441         if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE) {
8442                 err = resolve_map_arg_type(env, meta, &arg_type);
8443                 if (err)
8444                         return err;
8445         }
8446
8447         if (register_is_null(reg) && type_may_be_null(arg_type))
8448                 /* A NULL register has a SCALAR_VALUE type, so skip
8449                  * type checking.
8450                  */
8451                 goto skip_type_check;
8452
8453         /* arg_btf_id and arg_size are in a union. */
8454         if (base_type(arg_type) == ARG_PTR_TO_BTF_ID ||
8455             base_type(arg_type) == ARG_PTR_TO_SPIN_LOCK)
8456                 arg_btf_id = fn->arg_btf_id[arg];
8457
8458         err = check_reg_type(env, regno, arg_type, arg_btf_id, meta);
8459         if (err)
8460                 return err;
8461
8462         err = check_func_arg_reg_off(env, reg, regno, arg_type);
8463         if (err)
8464                 return err;
8465
8466 skip_type_check:
8467         if (arg_type_is_release(arg_type)) {
8468                 if (arg_type_is_dynptr(arg_type)) {
8469                         struct bpf_func_state *state = func(env, reg);
8470                         int spi;
8471
8472                         /* Only dynptr created on stack can be released, thus
8473                          * the get_spi and stack state checks for spilled_ptr
8474                          * should only be done before process_dynptr_func for
8475                          * PTR_TO_STACK.
8476                          */
8477                         if (reg->type == PTR_TO_STACK) {
8478                                 spi = dynptr_get_spi(env, reg);
8479                                 if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
8480                                         verbose(env, "arg %d is an unacquired reference\n", regno);
8481                                         return -EINVAL;
8482                                 }
8483                         } else {
8484                                 verbose(env, "cannot release unowned const bpf_dynptr\n");
8485                                 return -EINVAL;
8486                         }
8487                 } else if (!reg->ref_obj_id && !register_is_null(reg)) {
8488                         verbose(env, "R%d must be referenced when passed to release function\n",
8489                                 regno);
8490                         return -EINVAL;
8491                 }
8492                 if (meta->release_regno) {
8493                         verbose(env, "verifier internal error: more than one release argument\n");
8494                         return -EFAULT;
8495                 }
8496                 meta->release_regno = regno;
8497         }
8498
8499         if (reg->ref_obj_id) {
8500                 if (meta->ref_obj_id) {
8501                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
8502                                 regno, reg->ref_obj_id,
8503                                 meta->ref_obj_id);
8504                         return -EFAULT;
8505                 }
8506                 meta->ref_obj_id = reg->ref_obj_id;
8507         }
8508
8509         switch (base_type(arg_type)) {
8510         case ARG_CONST_MAP_PTR:
8511                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
8512                 if (meta->map_ptr) {
8513                         /* Use map_uid (which is unique id of inner map) to reject:
8514                          * inner_map1 = bpf_map_lookup_elem(outer_map, key1)
8515                          * inner_map2 = bpf_map_lookup_elem(outer_map, key2)
8516                          * if (inner_map1 && inner_map2) {
8517                          *     timer = bpf_map_lookup_elem(inner_map1);
8518                          *     if (timer)
8519                          *         // mismatch would have been allowed
8520                          *         bpf_timer_init(timer, inner_map2);
8521                          * }
8522                          *
8523                          * Comparing map_ptr is enough to distinguish normal and outer maps.
8524                          */
8525                         if (meta->map_ptr != reg->map_ptr ||
8526                             meta->map_uid != reg->map_uid) {
8527                                 verbose(env,
8528                                         "timer pointer in R1 map_uid=%d doesn't match map pointer in R2 map_uid=%d\n",
8529                                         meta->map_uid, reg->map_uid);
8530                                 return -EINVAL;
8531                         }
8532                 }
8533                 meta->map_ptr = reg->map_ptr;
8534                 meta->map_uid = reg->map_uid;
8535                 break;
8536         case ARG_PTR_TO_MAP_KEY:
8537                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
8538                  * check that [key, key + map->key_size) are within
8539                  * stack limits and initialized
8540                  */
8541                 if (!meta->map_ptr) {
8542                         /* in function declaration map_ptr must come before
8543                          * map_key, so that it's verified and known before
8544                          * we have to check map_key here. Otherwise it means
8545                          * that kernel subsystem misconfigured verifier
8546                          */
8547                         verbose(env, "invalid map_ptr to access map->key\n");
8548                         return -EACCES;
8549                 }
8550                 err = check_helper_mem_access(env, regno,
8551                                               meta->map_ptr->key_size, false,
8552                                               NULL);
8553                 break;
8554         case ARG_PTR_TO_MAP_VALUE:
8555                 if (type_may_be_null(arg_type) && register_is_null(reg))
8556                         return 0;
8557
8558                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
8559                  * check [value, value + map->value_size) validity
8560                  */
8561                 if (!meta->map_ptr) {
8562                         /* kernel subsystem misconfigured verifier */
8563                         verbose(env, "invalid map_ptr to access map->value\n");
8564                         return -EACCES;
8565                 }
8566                 meta->raw_mode = arg_type & MEM_UNINIT;
8567                 err = check_helper_mem_access(env, regno,
8568                                               meta->map_ptr->value_size, false,
8569                                               meta);
8570                 break;
8571         case ARG_PTR_TO_PERCPU_BTF_ID:
8572                 if (!reg->btf_id) {
8573                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
8574                         return -EACCES;
8575                 }
8576                 meta->ret_btf = reg->btf;
8577                 meta->ret_btf_id = reg->btf_id;
8578                 break;
8579         case ARG_PTR_TO_SPIN_LOCK:
8580                 if (in_rbtree_lock_required_cb(env)) {
8581                         verbose(env, "can't spin_{lock,unlock} in rbtree cb\n");
8582                         return -EACCES;
8583                 }
8584                 if (meta->func_id == BPF_FUNC_spin_lock) {
8585                         err = process_spin_lock(env, regno, true);
8586                         if (err)
8587                                 return err;
8588                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
8589                         err = process_spin_lock(env, regno, false);
8590                         if (err)
8591                                 return err;
8592                 } else {
8593                         verbose(env, "verifier internal error\n");
8594                         return -EFAULT;
8595                 }
8596                 break;
8597         case ARG_PTR_TO_TIMER:
8598                 err = process_timer_func(env, regno, meta);
8599                 if (err)
8600                         return err;
8601                 break;
8602         case ARG_PTR_TO_FUNC:
8603                 meta->subprogno = reg->subprogno;
8604                 break;
8605         case ARG_PTR_TO_MEM:
8606                 /* The access to this pointer is only checked when we hit the
8607                  * next is_mem_size argument below.
8608                  */
8609                 meta->raw_mode = arg_type & MEM_UNINIT;
8610                 if (arg_type & MEM_FIXED_SIZE) {
8611                         err = check_helper_mem_access(env, regno,
8612                                                       fn->arg_size[arg], false,
8613                                                       meta);
8614                 }
8615                 break;
8616         case ARG_CONST_SIZE:
8617                 err = check_mem_size_reg(env, reg, regno, false, meta);
8618                 break;
8619         case ARG_CONST_SIZE_OR_ZERO:
8620                 err = check_mem_size_reg(env, reg, regno, true, meta);
8621                 break;
8622         case ARG_PTR_TO_DYNPTR:
8623                 err = process_dynptr_func(env, regno, insn_idx, arg_type, 0);
8624                 if (err)
8625                         return err;
8626                 break;
8627         case ARG_CONST_ALLOC_SIZE_OR_ZERO:
8628                 if (!tnum_is_const(reg->var_off)) {
8629                         verbose(env, "R%d is not a known constant'\n",
8630                                 regno);
8631                         return -EACCES;
8632                 }
8633                 meta->mem_size = reg->var_off.value;
8634                 err = mark_chain_precision(env, regno);
8635                 if (err)
8636                         return err;
8637                 break;
8638         case ARG_PTR_TO_INT:
8639         case ARG_PTR_TO_LONG:
8640         {
8641                 int size = int_ptr_type_to_size(arg_type);
8642
8643                 err = check_helper_mem_access(env, regno, size, false, meta);
8644                 if (err)
8645                         return err;
8646                 err = check_ptr_alignment(env, reg, 0, size, true);
8647                 break;
8648         }
8649         case ARG_PTR_TO_CONST_STR:
8650         {
8651                 struct bpf_map *map = reg->map_ptr;
8652                 int map_off;
8653                 u64 map_addr;
8654                 char *str_ptr;
8655
8656                 if (!bpf_map_is_rdonly(map)) {
8657                         verbose(env, "R%d does not point to a readonly map'\n", regno);
8658                         return -EACCES;
8659                 }
8660
8661                 if (!tnum_is_const(reg->var_off)) {
8662                         verbose(env, "R%d is not a constant address'\n", regno);
8663                         return -EACCES;
8664                 }
8665
8666                 if (!map->ops->map_direct_value_addr) {
8667                         verbose(env, "no direct value access support for this map type\n");
8668                         return -EACCES;
8669                 }
8670
8671                 err = check_map_access(env, regno, reg->off,
8672                                        map->value_size - reg->off, false,
8673                                        ACCESS_HELPER);
8674                 if (err)
8675                         return err;
8676
8677                 map_off = reg->off + reg->var_off.value;
8678                 err = map->ops->map_direct_value_addr(map, &map_addr, map_off);
8679                 if (err) {
8680                         verbose(env, "direct value access on string failed\n");
8681                         return err;
8682                 }
8683
8684                 str_ptr = (char *)(long)(map_addr);
8685                 if (!strnchr(str_ptr + map_off, map->value_size - map_off, 0)) {
8686                         verbose(env, "string is not zero-terminated\n");
8687                         return -EINVAL;
8688                 }
8689                 break;
8690         }
8691         case ARG_PTR_TO_KPTR:
8692                 err = process_kptr_func(env, regno, meta);
8693                 if (err)
8694                         return err;
8695                 break;
8696         }
8697
8698         return err;
8699 }
8700
8701 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
8702 {
8703         enum bpf_attach_type eatype = env->prog->expected_attach_type;
8704         enum bpf_prog_type type = resolve_prog_type(env->prog);
8705
8706         if (func_id != BPF_FUNC_map_update_elem)
8707                 return false;
8708
8709         /* It's not possible to get access to a locked struct sock in these
8710          * contexts, so updating is safe.
8711          */
8712         switch (type) {
8713         case BPF_PROG_TYPE_TRACING:
8714                 if (eatype == BPF_TRACE_ITER)
8715                         return true;
8716                 break;
8717         case BPF_PROG_TYPE_SOCKET_FILTER:
8718         case BPF_PROG_TYPE_SCHED_CLS:
8719         case BPF_PROG_TYPE_SCHED_ACT:
8720         case BPF_PROG_TYPE_XDP:
8721         case BPF_PROG_TYPE_SK_REUSEPORT:
8722         case BPF_PROG_TYPE_FLOW_DISSECTOR:
8723         case BPF_PROG_TYPE_SK_LOOKUP:
8724                 return true;
8725         default:
8726                 break;
8727         }
8728
8729         verbose(env, "cannot update sockmap in this context\n");
8730         return false;
8731 }
8732
8733 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
8734 {
8735         return env->prog->jit_requested &&
8736                bpf_jit_supports_subprog_tailcalls();
8737 }
8738
8739 static int check_map_func_compatibility(struct bpf_verifier_env *env,
8740                                         struct bpf_map *map, int func_id)
8741 {
8742         if (!map)
8743                 return 0;
8744
8745         /* We need a two way check, first is from map perspective ... */
8746         switch (map->map_type) {
8747         case BPF_MAP_TYPE_PROG_ARRAY:
8748                 if (func_id != BPF_FUNC_tail_call)
8749                         goto error;
8750                 break;
8751         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
8752                 if (func_id != BPF_FUNC_perf_event_read &&
8753                     func_id != BPF_FUNC_perf_event_output &&
8754                     func_id != BPF_FUNC_skb_output &&
8755                     func_id != BPF_FUNC_perf_event_read_value &&
8756                     func_id != BPF_FUNC_xdp_output)
8757                         goto error;
8758                 break;
8759         case BPF_MAP_TYPE_RINGBUF:
8760                 if (func_id != BPF_FUNC_ringbuf_output &&
8761                     func_id != BPF_FUNC_ringbuf_reserve &&
8762                     func_id != BPF_FUNC_ringbuf_query &&
8763                     func_id != BPF_FUNC_ringbuf_reserve_dynptr &&
8764                     func_id != BPF_FUNC_ringbuf_submit_dynptr &&
8765                     func_id != BPF_FUNC_ringbuf_discard_dynptr)
8766                         goto error;
8767                 break;
8768         case BPF_MAP_TYPE_USER_RINGBUF:
8769                 if (func_id != BPF_FUNC_user_ringbuf_drain)
8770                         goto error;
8771                 break;
8772         case BPF_MAP_TYPE_STACK_TRACE:
8773                 if (func_id != BPF_FUNC_get_stackid)
8774                         goto error;
8775                 break;
8776         case BPF_MAP_TYPE_CGROUP_ARRAY:
8777                 if (func_id != BPF_FUNC_skb_under_cgroup &&
8778                     func_id != BPF_FUNC_current_task_under_cgroup)
8779                         goto error;
8780                 break;
8781         case BPF_MAP_TYPE_CGROUP_STORAGE:
8782         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
8783                 if (func_id != BPF_FUNC_get_local_storage)
8784                         goto error;
8785                 break;
8786         case BPF_MAP_TYPE_DEVMAP:
8787         case BPF_MAP_TYPE_DEVMAP_HASH:
8788                 if (func_id != BPF_FUNC_redirect_map &&
8789                     func_id != BPF_FUNC_map_lookup_elem)
8790                         goto error;
8791                 break;
8792         /* Restrict bpf side of cpumap and xskmap, open when use-cases
8793          * appear.
8794          */
8795         case BPF_MAP_TYPE_CPUMAP:
8796                 if (func_id != BPF_FUNC_redirect_map)
8797                         goto error;
8798                 break;
8799         case BPF_MAP_TYPE_XSKMAP:
8800                 if (func_id != BPF_FUNC_redirect_map &&
8801                     func_id != BPF_FUNC_map_lookup_elem)
8802                         goto error;
8803                 break;
8804         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
8805         case BPF_MAP_TYPE_HASH_OF_MAPS:
8806                 if (func_id != BPF_FUNC_map_lookup_elem)
8807                         goto error;
8808                 break;
8809         case BPF_MAP_TYPE_SOCKMAP:
8810                 if (func_id != BPF_FUNC_sk_redirect_map &&
8811                     func_id != BPF_FUNC_sock_map_update &&
8812                     func_id != BPF_FUNC_map_delete_elem &&
8813                     func_id != BPF_FUNC_msg_redirect_map &&
8814                     func_id != BPF_FUNC_sk_select_reuseport &&
8815                     func_id != BPF_FUNC_map_lookup_elem &&
8816                     !may_update_sockmap(env, func_id))
8817                         goto error;
8818                 break;
8819         case BPF_MAP_TYPE_SOCKHASH:
8820                 if (func_id != BPF_FUNC_sk_redirect_hash &&
8821                     func_id != BPF_FUNC_sock_hash_update &&
8822                     func_id != BPF_FUNC_map_delete_elem &&
8823                     func_id != BPF_FUNC_msg_redirect_hash &&
8824                     func_id != BPF_FUNC_sk_select_reuseport &&
8825                     func_id != BPF_FUNC_map_lookup_elem &&
8826                     !may_update_sockmap(env, func_id))
8827                         goto error;
8828                 break;
8829         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
8830                 if (func_id != BPF_FUNC_sk_select_reuseport)
8831                         goto error;
8832                 break;
8833         case BPF_MAP_TYPE_QUEUE:
8834         case BPF_MAP_TYPE_STACK:
8835                 if (func_id != BPF_FUNC_map_peek_elem &&
8836                     func_id != BPF_FUNC_map_pop_elem &&
8837                     func_id != BPF_FUNC_map_push_elem)
8838                         goto error;
8839                 break;
8840         case BPF_MAP_TYPE_SK_STORAGE:
8841                 if (func_id != BPF_FUNC_sk_storage_get &&
8842                     func_id != BPF_FUNC_sk_storage_delete &&
8843                     func_id != BPF_FUNC_kptr_xchg)
8844                         goto error;
8845                 break;
8846         case BPF_MAP_TYPE_INODE_STORAGE:
8847                 if (func_id != BPF_FUNC_inode_storage_get &&
8848                     func_id != BPF_FUNC_inode_storage_delete &&
8849                     func_id != BPF_FUNC_kptr_xchg)
8850                         goto error;
8851                 break;
8852         case BPF_MAP_TYPE_TASK_STORAGE:
8853                 if (func_id != BPF_FUNC_task_storage_get &&
8854                     func_id != BPF_FUNC_task_storage_delete &&
8855                     func_id != BPF_FUNC_kptr_xchg)
8856                         goto error;
8857                 break;
8858         case BPF_MAP_TYPE_CGRP_STORAGE:
8859                 if (func_id != BPF_FUNC_cgrp_storage_get &&
8860                     func_id != BPF_FUNC_cgrp_storage_delete &&
8861                     func_id != BPF_FUNC_kptr_xchg)
8862                         goto error;
8863                 break;
8864         case BPF_MAP_TYPE_BLOOM_FILTER:
8865                 if (func_id != BPF_FUNC_map_peek_elem &&
8866                     func_id != BPF_FUNC_map_push_elem)
8867                         goto error;
8868                 break;
8869         default:
8870                 break;
8871         }
8872
8873         /* ... and second from the function itself. */
8874         switch (func_id) {
8875         case BPF_FUNC_tail_call:
8876                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
8877                         goto error;
8878                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
8879                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
8880                         return -EINVAL;
8881                 }
8882                 break;
8883         case BPF_FUNC_perf_event_read:
8884         case BPF_FUNC_perf_event_output:
8885         case BPF_FUNC_perf_event_read_value:
8886         case BPF_FUNC_skb_output:
8887         case BPF_FUNC_xdp_output:
8888                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
8889                         goto error;
8890                 break;
8891         case BPF_FUNC_ringbuf_output:
8892         case BPF_FUNC_ringbuf_reserve:
8893         case BPF_FUNC_ringbuf_query:
8894         case BPF_FUNC_ringbuf_reserve_dynptr:
8895         case BPF_FUNC_ringbuf_submit_dynptr:
8896         case BPF_FUNC_ringbuf_discard_dynptr:
8897                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
8898                         goto error;
8899                 break;
8900         case BPF_FUNC_user_ringbuf_drain:
8901                 if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
8902                         goto error;
8903                 break;
8904         case BPF_FUNC_get_stackid:
8905                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
8906                         goto error;
8907                 break;
8908         case BPF_FUNC_current_task_under_cgroup:
8909         case BPF_FUNC_skb_under_cgroup:
8910                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
8911                         goto error;
8912                 break;
8913         case BPF_FUNC_redirect_map:
8914                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
8915                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
8916                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
8917                     map->map_type != BPF_MAP_TYPE_XSKMAP)
8918                         goto error;
8919                 break;
8920         case BPF_FUNC_sk_redirect_map:
8921         case BPF_FUNC_msg_redirect_map:
8922         case BPF_FUNC_sock_map_update:
8923                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
8924                         goto error;
8925                 break;
8926         case BPF_FUNC_sk_redirect_hash:
8927         case BPF_FUNC_msg_redirect_hash:
8928         case BPF_FUNC_sock_hash_update:
8929                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
8930                         goto error;
8931                 break;
8932         case BPF_FUNC_get_local_storage:
8933                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
8934                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
8935                         goto error;
8936                 break;
8937         case BPF_FUNC_sk_select_reuseport:
8938                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
8939                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
8940                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
8941                         goto error;
8942                 break;
8943         case BPF_FUNC_map_pop_elem:
8944                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8945                     map->map_type != BPF_MAP_TYPE_STACK)
8946                         goto error;
8947                 break;
8948         case BPF_FUNC_map_peek_elem:
8949         case BPF_FUNC_map_push_elem:
8950                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
8951                     map->map_type != BPF_MAP_TYPE_STACK &&
8952                     map->map_type != BPF_MAP_TYPE_BLOOM_FILTER)
8953                         goto error;
8954                 break;
8955         case BPF_FUNC_map_lookup_percpu_elem:
8956                 if (map->map_type != BPF_MAP_TYPE_PERCPU_ARRAY &&
8957                     map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
8958                     map->map_type != BPF_MAP_TYPE_LRU_PERCPU_HASH)
8959                         goto error;
8960                 break;
8961         case BPF_FUNC_sk_storage_get:
8962         case BPF_FUNC_sk_storage_delete:
8963                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
8964                         goto error;
8965                 break;
8966         case BPF_FUNC_inode_storage_get:
8967         case BPF_FUNC_inode_storage_delete:
8968                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
8969                         goto error;
8970                 break;
8971         case BPF_FUNC_task_storage_get:
8972         case BPF_FUNC_task_storage_delete:
8973                 if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE)
8974                         goto error;
8975                 break;
8976         case BPF_FUNC_cgrp_storage_get:
8977         case BPF_FUNC_cgrp_storage_delete:
8978                 if (map->map_type != BPF_MAP_TYPE_CGRP_STORAGE)
8979                         goto error;
8980                 break;
8981         default:
8982                 break;
8983         }
8984
8985         return 0;
8986 error:
8987         verbose(env, "cannot pass map_type %d into func %s#%d\n",
8988                 map->map_type, func_id_name(func_id), func_id);
8989         return -EINVAL;
8990 }
8991
8992 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
8993 {
8994         int count = 0;
8995
8996         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
8997                 count++;
8998         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
8999                 count++;
9000         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
9001                 count++;
9002         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
9003                 count++;
9004         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
9005                 count++;
9006
9007         /* We only support one arg being in raw mode at the moment,
9008          * which is sufficient for the helper functions we have
9009          * right now.
9010          */
9011         return count <= 1;
9012 }
9013
9014 static bool check_args_pair_invalid(const struct bpf_func_proto *fn, int arg)
9015 {
9016         bool is_fixed = fn->arg_type[arg] & MEM_FIXED_SIZE;
9017         bool has_size = fn->arg_size[arg] != 0;
9018         bool is_next_size = false;
9019
9020         if (arg + 1 < ARRAY_SIZE(fn->arg_type))
9021                 is_next_size = arg_type_is_mem_size(fn->arg_type[arg + 1]);
9022
9023         if (base_type(fn->arg_type[arg]) != ARG_PTR_TO_MEM)
9024                 return is_next_size;
9025
9026         return has_size == is_next_size || is_next_size == is_fixed;
9027 }
9028
9029 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
9030 {
9031         /* bpf_xxx(..., buf, len) call will access 'len'
9032          * bytes from memory 'buf'. Both arg types need
9033          * to be paired, so make sure there's no buggy
9034          * helper function specification.
9035          */
9036         if (arg_type_is_mem_size(fn->arg1_type) ||
9037             check_args_pair_invalid(fn, 0) ||
9038             check_args_pair_invalid(fn, 1) ||
9039             check_args_pair_invalid(fn, 2) ||
9040             check_args_pair_invalid(fn, 3) ||
9041             check_args_pair_invalid(fn, 4))
9042                 return false;
9043
9044         return true;
9045 }
9046
9047 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
9048 {
9049         int i;
9050
9051         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
9052                 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_BTF_ID)
9053                         return !!fn->arg_btf_id[i];
9054                 if (base_type(fn->arg_type[i]) == ARG_PTR_TO_SPIN_LOCK)
9055                         return fn->arg_btf_id[i] == BPF_PTR_POISON;
9056                 if (base_type(fn->arg_type[i]) != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i] &&
9057                     /* arg_btf_id and arg_size are in a union. */
9058                     (base_type(fn->arg_type[i]) != ARG_PTR_TO_MEM ||
9059                      !(fn->arg_type[i] & MEM_FIXED_SIZE)))
9060                         return false;
9061         }
9062
9063         return true;
9064 }
9065
9066 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
9067 {
9068         return check_raw_mode_ok(fn) &&
9069                check_arg_pair_ok(fn) &&
9070                check_btf_id_ok(fn) ? 0 : -EINVAL;
9071 }
9072
9073 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
9074  * are now invalid, so turn them into unknown SCALAR_VALUE.
9075  *
9076  * This also applies to dynptr slices belonging to skb and xdp dynptrs,
9077  * since these slices point to packet data.
9078  */
9079 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
9080 {
9081         struct bpf_func_state *state;
9082         struct bpf_reg_state *reg;
9083
9084         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9085                 if (reg_is_pkt_pointer_any(reg) || reg_is_dynptr_slice_pkt(reg))
9086                         mark_reg_invalid(env, reg);
9087         }));
9088 }
9089
9090 enum {
9091         AT_PKT_END = -1,
9092         BEYOND_PKT_END = -2,
9093 };
9094
9095 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
9096 {
9097         struct bpf_func_state *state = vstate->frame[vstate->curframe];
9098         struct bpf_reg_state *reg = &state->regs[regn];
9099
9100         if (reg->type != PTR_TO_PACKET)
9101                 /* PTR_TO_PACKET_META is not supported yet */
9102                 return;
9103
9104         /* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
9105          * How far beyond pkt_end it goes is unknown.
9106          * if (!range_open) it's the case of pkt >= pkt_end
9107          * if (range_open) it's the case of pkt > pkt_end
9108          * hence this pointer is at least 1 byte bigger than pkt_end
9109          */
9110         if (range_open)
9111                 reg->range = BEYOND_PKT_END;
9112         else
9113                 reg->range = AT_PKT_END;
9114 }
9115
9116 /* The pointer with the specified id has released its reference to kernel
9117  * resources. Identify all copies of the same pointer and clear the reference.
9118  */
9119 static int release_reference(struct bpf_verifier_env *env,
9120                              int ref_obj_id)
9121 {
9122         struct bpf_func_state *state;
9123         struct bpf_reg_state *reg;
9124         int err;
9125
9126         err = release_reference_state(cur_func(env), ref_obj_id);
9127         if (err)
9128                 return err;
9129
9130         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
9131                 if (reg->ref_obj_id == ref_obj_id)
9132                         mark_reg_invalid(env, reg);
9133         }));
9134
9135         return 0;
9136 }
9137
9138 static void invalidate_non_owning_refs(struct bpf_verifier_env *env)
9139 {
9140         struct bpf_func_state *unused;
9141         struct bpf_reg_state *reg;
9142
9143         bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
9144                 if (type_is_non_owning_ref(reg->type))
9145                         mark_reg_invalid(env, reg);
9146         }));
9147 }
9148
9149 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
9150                                     struct bpf_reg_state *regs)
9151 {
9152         int i;
9153
9154         /* after the call registers r0 - r5 were scratched */
9155         for (i = 0; i < CALLER_SAVED_REGS; i++) {
9156                 mark_reg_not_init(env, regs, caller_saved[i]);
9157                 __check_reg_arg(env, regs, caller_saved[i], DST_OP_NO_MARK);
9158         }
9159 }
9160
9161 typedef int (*set_callee_state_fn)(struct bpf_verifier_env *env,
9162                                    struct bpf_func_state *caller,
9163                                    struct bpf_func_state *callee,
9164                                    int insn_idx);
9165
9166 static int set_callee_state(struct bpf_verifier_env *env,
9167                             struct bpf_func_state *caller,
9168                             struct bpf_func_state *callee, int insn_idx);
9169
9170 static int __check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9171                              int *insn_idx, int subprog,
9172                              set_callee_state_fn set_callee_state_cb)
9173 {
9174         struct bpf_verifier_state *state = env->cur_state;
9175         struct bpf_func_state *caller, *callee;
9176         int err;
9177
9178         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
9179                 verbose(env, "the call stack of %d frames is too deep\n",
9180                         state->curframe + 2);
9181                 return -E2BIG;
9182         }
9183
9184         caller = state->frame[state->curframe];
9185         if (state->frame[state->curframe + 1]) {
9186                 verbose(env, "verifier bug. Frame %d already allocated\n",
9187                         state->curframe + 1);
9188                 return -EFAULT;
9189         }
9190
9191         err = btf_check_subprog_call(env, subprog, caller->regs);
9192         if (err == -EFAULT)
9193                 return err;
9194         if (subprog_is_global(env, subprog)) {
9195                 if (err) {
9196                         verbose(env, "Caller passes invalid args into func#%d\n",
9197                                 subprog);
9198                         return err;
9199                 } else {
9200                         if (env->log.level & BPF_LOG_LEVEL)
9201                                 verbose(env,
9202                                         "Func#%d is global and valid. Skipping.\n",
9203                                         subprog);
9204                         clear_caller_saved_regs(env, caller->regs);
9205
9206                         /* All global functions return a 64-bit SCALAR_VALUE */
9207                         mark_reg_unknown(env, caller->regs, BPF_REG_0);
9208                         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9209
9210                         /* continue with next insn after call */
9211                         return 0;
9212                 }
9213         }
9214
9215         /* set_callee_state is used for direct subprog calls, but we are
9216          * interested in validating only BPF helpers that can call subprogs as
9217          * callbacks
9218          */
9219         if (set_callee_state_cb != set_callee_state) {
9220                 if (bpf_pseudo_kfunc_call(insn) &&
9221                     !is_callback_calling_kfunc(insn->imm)) {
9222                         verbose(env, "verifier bug: kfunc %s#%d not marked as callback-calling\n",
9223                                 func_id_name(insn->imm), insn->imm);
9224                         return -EFAULT;
9225                 } else if (!bpf_pseudo_kfunc_call(insn) &&
9226                            !is_callback_calling_function(insn->imm)) { /* helper */
9227                         verbose(env, "verifier bug: helper %s#%d not marked as callback-calling\n",
9228                                 func_id_name(insn->imm), insn->imm);
9229                         return -EFAULT;
9230                 }
9231         }
9232
9233         if (insn->code == (BPF_JMP | BPF_CALL) &&
9234             insn->src_reg == 0 &&
9235             insn->imm == BPF_FUNC_timer_set_callback) {
9236                 struct bpf_verifier_state *async_cb;
9237
9238                 /* there is no real recursion here. timer callbacks are async */
9239                 env->subprog_info[subprog].is_async_cb = true;
9240                 async_cb = push_async_cb(env, env->subprog_info[subprog].start,
9241                                          *insn_idx, subprog);
9242                 if (!async_cb)
9243                         return -EFAULT;
9244                 callee = async_cb->frame[0];
9245                 callee->async_entry_cnt = caller->async_entry_cnt + 1;
9246
9247                 /* Convert bpf_timer_set_callback() args into timer callback args */
9248                 err = set_callee_state_cb(env, caller, callee, *insn_idx);
9249                 if (err)
9250                         return err;
9251
9252                 clear_caller_saved_regs(env, caller->regs);
9253                 mark_reg_unknown(env, caller->regs, BPF_REG_0);
9254                 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
9255                 /* continue with next insn after call */
9256                 return 0;
9257         }
9258
9259         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
9260         if (!callee)
9261                 return -ENOMEM;
9262         state->frame[state->curframe + 1] = callee;
9263
9264         /* callee cannot access r0, r6 - r9 for reading and has to write
9265          * into its own stack before reading from it.
9266          * callee can read/write into caller's stack
9267          */
9268         init_func_state(env, callee,
9269                         /* remember the callsite, it will be used by bpf_exit */
9270                         *insn_idx /* callsite */,
9271                         state->curframe + 1 /* frameno within this callchain */,
9272                         subprog /* subprog number within this prog */);
9273
9274         /* Transfer references to the callee */
9275         err = copy_reference_state(callee, caller);
9276         if (err)
9277                 goto err_out;
9278
9279         err = set_callee_state_cb(env, caller, callee, *insn_idx);
9280         if (err)
9281                 goto err_out;
9282
9283         clear_caller_saved_regs(env, caller->regs);
9284
9285         /* only increment it after check_reg_arg() finished */
9286         state->curframe++;
9287
9288         /* and go analyze first insn of the callee */
9289         *insn_idx = env->subprog_info[subprog].start - 1;
9290
9291         if (env->log.level & BPF_LOG_LEVEL) {
9292                 verbose(env, "caller:\n");
9293                 print_verifier_state(env, caller, true);
9294                 verbose(env, "callee:\n");
9295                 print_verifier_state(env, callee, true);
9296         }
9297         return 0;
9298
9299 err_out:
9300         free_func_state(callee);
9301         state->frame[state->curframe + 1] = NULL;
9302         return err;
9303 }
9304
9305 int map_set_for_each_callback_args(struct bpf_verifier_env *env,
9306                                    struct bpf_func_state *caller,
9307                                    struct bpf_func_state *callee)
9308 {
9309         /* bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn,
9310          *      void *callback_ctx, u64 flags);
9311          * callback_fn(struct bpf_map *map, void *key, void *value,
9312          *      void *callback_ctx);
9313          */
9314         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9315
9316         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9317         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9318         callee->regs[BPF_REG_2].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9319
9320         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9321         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9322         callee->regs[BPF_REG_3].map_ptr = caller->regs[BPF_REG_1].map_ptr;
9323
9324         /* pointer to stack or null */
9325         callee->regs[BPF_REG_4] = caller->regs[BPF_REG_3];
9326
9327         /* unused */
9328         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9329         return 0;
9330 }
9331
9332 static int set_callee_state(struct bpf_verifier_env *env,
9333                             struct bpf_func_state *caller,
9334                             struct bpf_func_state *callee, int insn_idx)
9335 {
9336         int i;
9337
9338         /* copy r1 - r5 args that callee can access.  The copy includes parent
9339          * pointers, which connects us up to the liveness chain
9340          */
9341         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
9342                 callee->regs[i] = caller->regs[i];
9343         return 0;
9344 }
9345
9346 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9347                            int *insn_idx)
9348 {
9349         int subprog, target_insn;
9350
9351         target_insn = *insn_idx + insn->imm + 1;
9352         subprog = find_subprog(env, target_insn);
9353         if (subprog < 0) {
9354                 verbose(env, "verifier bug. No program starts at insn %d\n",
9355                         target_insn);
9356                 return -EFAULT;
9357         }
9358
9359         return __check_func_call(env, insn, insn_idx, subprog, set_callee_state);
9360 }
9361
9362 static int set_map_elem_callback_state(struct bpf_verifier_env *env,
9363                                        struct bpf_func_state *caller,
9364                                        struct bpf_func_state *callee,
9365                                        int insn_idx)
9366 {
9367         struct bpf_insn_aux_data *insn_aux = &env->insn_aux_data[insn_idx];
9368         struct bpf_map *map;
9369         int err;
9370
9371         if (bpf_map_ptr_poisoned(insn_aux)) {
9372                 verbose(env, "tail_call abusing map_ptr\n");
9373                 return -EINVAL;
9374         }
9375
9376         map = BPF_MAP_PTR(insn_aux->map_ptr_state);
9377         if (!map->ops->map_set_for_each_callback_args ||
9378             !map->ops->map_for_each_callback) {
9379                 verbose(env, "callback function not allowed for map\n");
9380                 return -ENOTSUPP;
9381         }
9382
9383         err = map->ops->map_set_for_each_callback_args(env, caller, callee);
9384         if (err)
9385                 return err;
9386
9387         callee->in_callback_fn = true;
9388         callee->callback_ret_range = tnum_range(0, 1);
9389         return 0;
9390 }
9391
9392 static int set_loop_callback_state(struct bpf_verifier_env *env,
9393                                    struct bpf_func_state *caller,
9394                                    struct bpf_func_state *callee,
9395                                    int insn_idx)
9396 {
9397         /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
9398          *          u64 flags);
9399          * callback_fn(u32 index, void *callback_ctx);
9400          */
9401         callee->regs[BPF_REG_1].type = SCALAR_VALUE;
9402         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9403
9404         /* unused */
9405         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9406         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9407         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9408
9409         callee->in_callback_fn = true;
9410         callee->callback_ret_range = tnum_range(0, 1);
9411         return 0;
9412 }
9413
9414 static int set_timer_callback_state(struct bpf_verifier_env *env,
9415                                     struct bpf_func_state *caller,
9416                                     struct bpf_func_state *callee,
9417                                     int insn_idx)
9418 {
9419         struct bpf_map *map_ptr = caller->regs[BPF_REG_1].map_ptr;
9420
9421         /* bpf_timer_set_callback(struct bpf_timer *timer, void *callback_fn);
9422          * callback_fn(struct bpf_map *map, void *key, void *value);
9423          */
9424         callee->regs[BPF_REG_1].type = CONST_PTR_TO_MAP;
9425         __mark_reg_known_zero(&callee->regs[BPF_REG_1]);
9426         callee->regs[BPF_REG_1].map_ptr = map_ptr;
9427
9428         callee->regs[BPF_REG_2].type = PTR_TO_MAP_KEY;
9429         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9430         callee->regs[BPF_REG_2].map_ptr = map_ptr;
9431
9432         callee->regs[BPF_REG_3].type = PTR_TO_MAP_VALUE;
9433         __mark_reg_known_zero(&callee->regs[BPF_REG_3]);
9434         callee->regs[BPF_REG_3].map_ptr = map_ptr;
9435
9436         /* unused */
9437         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9438         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9439         callee->in_async_callback_fn = true;
9440         callee->callback_ret_range = tnum_range(0, 1);
9441         return 0;
9442 }
9443
9444 static int set_find_vma_callback_state(struct bpf_verifier_env *env,
9445                                        struct bpf_func_state *caller,
9446                                        struct bpf_func_state *callee,
9447                                        int insn_idx)
9448 {
9449         /* bpf_find_vma(struct task_struct *task, u64 addr,
9450          *               void *callback_fn, void *callback_ctx, u64 flags)
9451          * (callback_fn)(struct task_struct *task,
9452          *               struct vm_area_struct *vma, void *callback_ctx);
9453          */
9454         callee->regs[BPF_REG_1] = caller->regs[BPF_REG_1];
9455
9456         callee->regs[BPF_REG_2].type = PTR_TO_BTF_ID;
9457         __mark_reg_known_zero(&callee->regs[BPF_REG_2]);
9458         callee->regs[BPF_REG_2].btf =  btf_vmlinux;
9459         callee->regs[BPF_REG_2].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA],
9460
9461         /* pointer to stack or null */
9462         callee->regs[BPF_REG_3] = caller->regs[BPF_REG_4];
9463
9464         /* unused */
9465         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9466         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9467         callee->in_callback_fn = true;
9468         callee->callback_ret_range = tnum_range(0, 1);
9469         return 0;
9470 }
9471
9472 static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
9473                                            struct bpf_func_state *caller,
9474                                            struct bpf_func_state *callee,
9475                                            int insn_idx)
9476 {
9477         /* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
9478          *                        callback_ctx, u64 flags);
9479          * callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
9480          */
9481         __mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
9482         mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
9483         callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
9484
9485         /* unused */
9486         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9487         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9488         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9489
9490         callee->in_callback_fn = true;
9491         callee->callback_ret_range = tnum_range(0, 1);
9492         return 0;
9493 }
9494
9495 static int set_rbtree_add_callback_state(struct bpf_verifier_env *env,
9496                                          struct bpf_func_state *caller,
9497                                          struct bpf_func_state *callee,
9498                                          int insn_idx)
9499 {
9500         /* void bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
9501          *                     bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b));
9502          *
9503          * 'struct bpf_rb_node *node' arg to bpf_rbtree_add_impl is the same PTR_TO_BTF_ID w/ offset
9504          * that 'less' callback args will be receiving. However, 'node' arg was release_reference'd
9505          * by this point, so look at 'root'
9506          */
9507         struct btf_field *field;
9508
9509         field = reg_find_field_offset(&caller->regs[BPF_REG_1], caller->regs[BPF_REG_1].off,
9510                                       BPF_RB_ROOT);
9511         if (!field || !field->graph_root.value_btf_id)
9512                 return -EFAULT;
9513
9514         mark_reg_graph_node(callee->regs, BPF_REG_1, &field->graph_root);
9515         ref_set_non_owning(env, &callee->regs[BPF_REG_1]);
9516         mark_reg_graph_node(callee->regs, BPF_REG_2, &field->graph_root);
9517         ref_set_non_owning(env, &callee->regs[BPF_REG_2]);
9518
9519         __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
9520         __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
9521         __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
9522         callee->in_callback_fn = true;
9523         callee->callback_ret_range = tnum_range(0, 1);
9524         return 0;
9525 }
9526
9527 static bool is_rbtree_lock_required_kfunc(u32 btf_id);
9528
9529 /* Are we currently verifying the callback for a rbtree helper that must
9530  * be called with lock held? If so, no need to complain about unreleased
9531  * lock
9532  */
9533 static bool in_rbtree_lock_required_cb(struct bpf_verifier_env *env)
9534 {
9535         struct bpf_verifier_state *state = env->cur_state;
9536         struct bpf_insn *insn = env->prog->insnsi;
9537         struct bpf_func_state *callee;
9538         int kfunc_btf_id;
9539
9540         if (!state->curframe)
9541                 return false;
9542
9543         callee = state->frame[state->curframe];
9544
9545         if (!callee->in_callback_fn)
9546                 return false;
9547
9548         kfunc_btf_id = insn[callee->callsite].imm;
9549         return is_rbtree_lock_required_kfunc(kfunc_btf_id);
9550 }
9551
9552 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
9553 {
9554         struct bpf_verifier_state *state = env->cur_state;
9555         struct bpf_func_state *caller, *callee;
9556         struct bpf_reg_state *r0;
9557         int err;
9558
9559         callee = state->frame[state->curframe];
9560         r0 = &callee->regs[BPF_REG_0];
9561         if (r0->type == PTR_TO_STACK) {
9562                 /* technically it's ok to return caller's stack pointer
9563                  * (or caller's caller's pointer) back to the caller,
9564                  * since these pointers are valid. Only current stack
9565                  * pointer will be invalid as soon as function exits,
9566                  * but let's be conservative
9567                  */
9568                 verbose(env, "cannot return stack pointer to the caller\n");
9569                 return -EINVAL;
9570         }
9571
9572         caller = state->frame[state->curframe - 1];
9573         if (callee->in_callback_fn) {
9574                 /* enforce R0 return value range [0, 1]. */
9575                 struct tnum range = callee->callback_ret_range;
9576
9577                 if (r0->type != SCALAR_VALUE) {
9578                         verbose(env, "R0 not a scalar value\n");
9579                         return -EACCES;
9580                 }
9581
9582                 /* we are going to rely on register's precise value */
9583                 err = mark_reg_read(env, r0, r0->parent, REG_LIVE_READ64);
9584                 err = err ?: mark_chain_precision(env, BPF_REG_0);
9585                 if (err)
9586                         return err;
9587
9588                 if (!tnum_in(range, r0->var_off)) {
9589                         verbose_invalid_scalar(env, r0, &range, "callback return", "R0");
9590                         return -EINVAL;
9591                 }
9592         } else {
9593                 /* return to the caller whatever r0 had in the callee */
9594                 caller->regs[BPF_REG_0] = *r0;
9595         }
9596
9597         /* callback_fn frame should have released its own additions to parent's
9598          * reference state at this point, or check_reference_leak would
9599          * complain, hence it must be the same as the caller. There is no need
9600          * to copy it back.
9601          */
9602         if (!callee->in_callback_fn) {
9603                 /* Transfer references to the caller */
9604                 err = copy_reference_state(caller, callee);
9605                 if (err)
9606                         return err;
9607         }
9608
9609         *insn_idx = callee->callsite + 1;
9610         if (env->log.level & BPF_LOG_LEVEL) {
9611                 verbose(env, "returning from callee:\n");
9612                 print_verifier_state(env, callee, true);
9613                 verbose(env, "to caller at %d:\n", *insn_idx);
9614                 print_verifier_state(env, caller, true);
9615         }
9616         /* clear everything in the callee */
9617         free_func_state(callee);
9618         state->frame[state->curframe--] = NULL;
9619         return 0;
9620 }
9621
9622 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
9623                                    int func_id,
9624                                    struct bpf_call_arg_meta *meta)
9625 {
9626         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
9627
9628         if (ret_type != RET_INTEGER)
9629                 return;
9630
9631         switch (func_id) {
9632         case BPF_FUNC_get_stack:
9633         case BPF_FUNC_get_task_stack:
9634         case BPF_FUNC_probe_read_str:
9635         case BPF_FUNC_probe_read_kernel_str:
9636         case BPF_FUNC_probe_read_user_str:
9637                 ret_reg->smax_value = meta->msize_max_value;
9638                 ret_reg->s32_max_value = meta->msize_max_value;
9639                 ret_reg->smin_value = -MAX_ERRNO;
9640                 ret_reg->s32_min_value = -MAX_ERRNO;
9641                 reg_bounds_sync(ret_reg);
9642                 break;
9643         case BPF_FUNC_get_smp_processor_id:
9644                 ret_reg->umax_value = nr_cpu_ids - 1;
9645                 ret_reg->u32_max_value = nr_cpu_ids - 1;
9646                 ret_reg->smax_value = nr_cpu_ids - 1;
9647                 ret_reg->s32_max_value = nr_cpu_ids - 1;
9648                 ret_reg->umin_value = 0;
9649                 ret_reg->u32_min_value = 0;
9650                 ret_reg->smin_value = 0;
9651                 ret_reg->s32_min_value = 0;
9652                 reg_bounds_sync(ret_reg);
9653                 break;
9654         }
9655 }
9656
9657 static int
9658 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9659                 int func_id, int insn_idx)
9660 {
9661         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9662         struct bpf_map *map = meta->map_ptr;
9663
9664         if (func_id != BPF_FUNC_tail_call &&
9665             func_id != BPF_FUNC_map_lookup_elem &&
9666             func_id != BPF_FUNC_map_update_elem &&
9667             func_id != BPF_FUNC_map_delete_elem &&
9668             func_id != BPF_FUNC_map_push_elem &&
9669             func_id != BPF_FUNC_map_pop_elem &&
9670             func_id != BPF_FUNC_map_peek_elem &&
9671             func_id != BPF_FUNC_for_each_map_elem &&
9672             func_id != BPF_FUNC_redirect_map &&
9673             func_id != BPF_FUNC_map_lookup_percpu_elem)
9674                 return 0;
9675
9676         if (map == NULL) {
9677                 verbose(env, "kernel subsystem misconfigured verifier\n");
9678                 return -EINVAL;
9679         }
9680
9681         /* In case of read-only, some additional restrictions
9682          * need to be applied in order to prevent altering the
9683          * state of the map from program side.
9684          */
9685         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
9686             (func_id == BPF_FUNC_map_delete_elem ||
9687              func_id == BPF_FUNC_map_update_elem ||
9688              func_id == BPF_FUNC_map_push_elem ||
9689              func_id == BPF_FUNC_map_pop_elem)) {
9690                 verbose(env, "write into map forbidden\n");
9691                 return -EACCES;
9692         }
9693
9694         if (!BPF_MAP_PTR(aux->map_ptr_state))
9695                 bpf_map_ptr_store(aux, meta->map_ptr,
9696                                   !meta->map_ptr->bypass_spec_v1);
9697         else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
9698                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
9699                                   !meta->map_ptr->bypass_spec_v1);
9700         return 0;
9701 }
9702
9703 static int
9704 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
9705                 int func_id, int insn_idx)
9706 {
9707         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
9708         struct bpf_reg_state *regs = cur_regs(env), *reg;
9709         struct bpf_map *map = meta->map_ptr;
9710         u64 val, max;
9711         int err;
9712
9713         if (func_id != BPF_FUNC_tail_call)
9714                 return 0;
9715         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
9716                 verbose(env, "kernel subsystem misconfigured verifier\n");
9717                 return -EINVAL;
9718         }
9719
9720         reg = &regs[BPF_REG_3];
9721         val = reg->var_off.value;
9722         max = map->max_entries;
9723
9724         if (!(register_is_const(reg) && val < max)) {
9725                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9726                 return 0;
9727         }
9728
9729         err = mark_chain_precision(env, BPF_REG_3);
9730         if (err)
9731                 return err;
9732         if (bpf_map_key_unseen(aux))
9733                 bpf_map_key_store(aux, val);
9734         else if (!bpf_map_key_poisoned(aux) &&
9735                   bpf_map_key_immediate(aux) != val)
9736                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
9737         return 0;
9738 }
9739
9740 static int check_reference_leak(struct bpf_verifier_env *env)
9741 {
9742         struct bpf_func_state *state = cur_func(env);
9743         bool refs_lingering = false;
9744         int i;
9745
9746         if (state->frameno && !state->in_callback_fn)
9747                 return 0;
9748
9749         for (i = 0; i < state->acquired_refs; i++) {
9750                 if (state->in_callback_fn && state->refs[i].callback_ref != state->frameno)
9751                         continue;
9752                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
9753                         state->refs[i].id, state->refs[i].insn_idx);
9754                 refs_lingering = true;
9755         }
9756         return refs_lingering ? -EINVAL : 0;
9757 }
9758
9759 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
9760                                    struct bpf_reg_state *regs)
9761 {
9762         struct bpf_reg_state *fmt_reg = &regs[BPF_REG_3];
9763         struct bpf_reg_state *data_len_reg = &regs[BPF_REG_5];
9764         struct bpf_map *fmt_map = fmt_reg->map_ptr;
9765         struct bpf_bprintf_data data = {};
9766         int err, fmt_map_off, num_args;
9767         u64 fmt_addr;
9768         char *fmt;
9769
9770         /* data must be an array of u64 */
9771         if (data_len_reg->var_off.value % 8)
9772                 return -EINVAL;
9773         num_args = data_len_reg->var_off.value / 8;
9774
9775         /* fmt being ARG_PTR_TO_CONST_STR guarantees that var_off is const
9776          * and map_direct_value_addr is set.
9777          */
9778         fmt_map_off = fmt_reg->off + fmt_reg->var_off.value;
9779         err = fmt_map->ops->map_direct_value_addr(fmt_map, &fmt_addr,
9780                                                   fmt_map_off);
9781         if (err) {
9782                 verbose(env, "verifier bug\n");
9783                 return -EFAULT;
9784         }
9785         fmt = (char *)(long)fmt_addr + fmt_map_off;
9786
9787         /* We are also guaranteed that fmt+fmt_map_off is NULL terminated, we
9788          * can focus on validating the format specifiers.
9789          */
9790         err = bpf_bprintf_prepare(fmt, UINT_MAX, NULL, num_args, &data);
9791         if (err < 0)
9792                 verbose(env, "Invalid format string\n");
9793
9794         return err;
9795 }
9796
9797 static int check_get_func_ip(struct bpf_verifier_env *env)
9798 {
9799         enum bpf_prog_type type = resolve_prog_type(env->prog);
9800         int func_id = BPF_FUNC_get_func_ip;
9801
9802         if (type == BPF_PROG_TYPE_TRACING) {
9803                 if (!bpf_prog_has_trampoline(env->prog)) {
9804                         verbose(env, "func %s#%d supported only for fentry/fexit/fmod_ret programs\n",
9805                                 func_id_name(func_id), func_id);
9806                         return -ENOTSUPP;
9807                 }
9808                 return 0;
9809         } else if (type == BPF_PROG_TYPE_KPROBE) {
9810                 return 0;
9811         }
9812
9813         verbose(env, "func %s#%d not supported for program type %d\n",
9814                 func_id_name(func_id), func_id, type);
9815         return -ENOTSUPP;
9816 }
9817
9818 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
9819 {
9820         return &env->insn_aux_data[env->insn_idx];
9821 }
9822
9823 static bool loop_flag_is_zero(struct bpf_verifier_env *env)
9824 {
9825         struct bpf_reg_state *regs = cur_regs(env);
9826         struct bpf_reg_state *reg = &regs[BPF_REG_4];
9827         bool reg_is_null = register_is_null(reg);
9828
9829         if (reg_is_null)
9830                 mark_chain_precision(env, BPF_REG_4);
9831
9832         return reg_is_null;
9833 }
9834
9835 static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
9836 {
9837         struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
9838
9839         if (!state->initialized) {
9840                 state->initialized = 1;
9841                 state->fit_for_inline = loop_flag_is_zero(env);
9842                 state->callback_subprogno = subprogno;
9843                 return;
9844         }
9845
9846         if (!state->fit_for_inline)
9847                 return;
9848
9849         state->fit_for_inline = (loop_flag_is_zero(env) &&
9850                                  state->callback_subprogno == subprogno);
9851 }
9852
9853 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
9854                              int *insn_idx_p)
9855 {
9856         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
9857         const struct bpf_func_proto *fn = NULL;
9858         enum bpf_return_type ret_type;
9859         enum bpf_type_flag ret_flag;
9860         struct bpf_reg_state *regs;
9861         struct bpf_call_arg_meta meta;
9862         int insn_idx = *insn_idx_p;
9863         bool changes_data;
9864         int i, err, func_id;
9865
9866         /* find function prototype */
9867         func_id = insn->imm;
9868         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
9869                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
9870                         func_id);
9871                 return -EINVAL;
9872         }
9873
9874         if (env->ops->get_func_proto)
9875                 fn = env->ops->get_func_proto(func_id, env->prog);
9876         if (!fn) {
9877                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
9878                         func_id);
9879                 return -EINVAL;
9880         }
9881
9882         /* eBPF programs must be GPL compatible to use GPL-ed functions */
9883         if (!env->prog->gpl_compatible && fn->gpl_only) {
9884                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
9885                 return -EINVAL;
9886         }
9887
9888         if (fn->allowed && !fn->allowed(env->prog)) {
9889                 verbose(env, "helper call is not allowed in probe\n");
9890                 return -EINVAL;
9891         }
9892
9893         if (!env->prog->aux->sleepable && fn->might_sleep) {
9894                 verbose(env, "helper call might sleep in a non-sleepable prog\n");
9895                 return -EINVAL;
9896         }
9897
9898         /* With LD_ABS/IND some JITs save/restore skb from r1. */
9899         changes_data = bpf_helper_changes_pkt_data(fn->func);
9900         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
9901                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
9902                         func_id_name(func_id), func_id);
9903                 return -EINVAL;
9904         }
9905
9906         memset(&meta, 0, sizeof(meta));
9907         meta.pkt_access = fn->pkt_access;
9908
9909         err = check_func_proto(fn, func_id);
9910         if (err) {
9911                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
9912                         func_id_name(func_id), func_id);
9913                 return err;
9914         }
9915
9916         if (env->cur_state->active_rcu_lock) {
9917                 if (fn->might_sleep) {
9918                         verbose(env, "sleepable helper %s#%d in rcu_read_lock region\n",
9919                                 func_id_name(func_id), func_id);
9920                         return -EINVAL;
9921                 }
9922
9923                 if (env->prog->aux->sleepable && is_storage_get_function(func_id))
9924                         env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
9925         }
9926
9927         meta.func_id = func_id;
9928         /* check args */
9929         for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
9930                 err = check_func_arg(env, i, &meta, fn, insn_idx);
9931                 if (err)
9932                         return err;
9933         }
9934
9935         err = record_func_map(env, &meta, func_id, insn_idx);
9936         if (err)
9937                 return err;
9938
9939         err = record_func_key(env, &meta, func_id, insn_idx);
9940         if (err)
9941                 return err;
9942
9943         /* Mark slots with STACK_MISC in case of raw mode, stack offset
9944          * is inferred from register state.
9945          */
9946         for (i = 0; i < meta.access_size; i++) {
9947                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
9948                                        BPF_WRITE, -1, false, false);
9949                 if (err)
9950                         return err;
9951         }
9952
9953         regs = cur_regs(env);
9954
9955         if (meta.release_regno) {
9956                 err = -EINVAL;
9957                 /* This can only be set for PTR_TO_STACK, as CONST_PTR_TO_DYNPTR cannot
9958                  * be released by any dynptr helper. Hence, unmark_stack_slots_dynptr
9959                  * is safe to do directly.
9960                  */
9961                 if (arg_type_is_dynptr(fn->arg_type[meta.release_regno - BPF_REG_1])) {
9962                         if (regs[meta.release_regno].type == CONST_PTR_TO_DYNPTR) {
9963                                 verbose(env, "verifier internal error: CONST_PTR_TO_DYNPTR cannot be released\n");
9964                                 return -EFAULT;
9965                         }
9966                         err = unmark_stack_slots_dynptr(env, &regs[meta.release_regno]);
9967                 } else if (meta.ref_obj_id) {
9968                         err = release_reference(env, meta.ref_obj_id);
9969                 } else if (register_is_null(&regs[meta.release_regno])) {
9970                         /* meta.ref_obj_id can only be 0 if register that is meant to be
9971                          * released is NULL, which must be > R0.
9972                          */
9973                         err = 0;
9974                 }
9975                 if (err) {
9976                         verbose(env, "func %s#%d reference has not been acquired before\n",
9977                                 func_id_name(func_id), func_id);
9978                         return err;
9979                 }
9980         }
9981
9982         switch (func_id) {
9983         case BPF_FUNC_tail_call:
9984                 err = check_reference_leak(env);
9985                 if (err) {
9986                         verbose(env, "tail_call would lead to reference leak\n");
9987                         return err;
9988                 }
9989                 break;
9990         case BPF_FUNC_get_local_storage:
9991                 /* check that flags argument in get_local_storage(map, flags) is 0,
9992                  * this is required because get_local_storage() can't return an error.
9993                  */
9994                 if (!register_is_null(&regs[BPF_REG_2])) {
9995                         verbose(env, "get_local_storage() doesn't support non-zero flags\n");
9996                         return -EINVAL;
9997                 }
9998                 break;
9999         case BPF_FUNC_for_each_map_elem:
10000                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10001                                         set_map_elem_callback_state);
10002                 break;
10003         case BPF_FUNC_timer_set_callback:
10004                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10005                                         set_timer_callback_state);
10006                 break;
10007         case BPF_FUNC_find_vma:
10008                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10009                                         set_find_vma_callback_state);
10010                 break;
10011         case BPF_FUNC_snprintf:
10012                 err = check_bpf_snprintf_call(env, regs);
10013                 break;
10014         case BPF_FUNC_loop:
10015                 update_loop_inline_state(env, meta.subprogno);
10016                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10017                                         set_loop_callback_state);
10018                 break;
10019         case BPF_FUNC_dynptr_from_mem:
10020                 if (regs[BPF_REG_1].type != PTR_TO_MAP_VALUE) {
10021                         verbose(env, "Unsupported reg type %s for bpf_dynptr_from_mem data\n",
10022                                 reg_type_str(env, regs[BPF_REG_1].type));
10023                         return -EACCES;
10024                 }
10025                 break;
10026         case BPF_FUNC_set_retval:
10027                 if (prog_type == BPF_PROG_TYPE_LSM &&
10028                     env->prog->expected_attach_type == BPF_LSM_CGROUP) {
10029                         if (!env->prog->aux->attach_func_proto->type) {
10030                                 /* Make sure programs that attach to void
10031                                  * hooks don't try to modify return value.
10032                                  */
10033                                 verbose(env, "BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
10034                                 return -EINVAL;
10035                         }
10036                 }
10037                 break;
10038         case BPF_FUNC_dynptr_data:
10039         {
10040                 struct bpf_reg_state *reg;
10041                 int id, ref_obj_id;
10042
10043                 reg = get_dynptr_arg_reg(env, fn, regs);
10044                 if (!reg)
10045                         return -EFAULT;
10046
10047
10048                 if (meta.dynptr_id) {
10049                         verbose(env, "verifier internal error: meta.dynptr_id already set\n");
10050                         return -EFAULT;
10051                 }
10052                 if (meta.ref_obj_id) {
10053                         verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
10054                         return -EFAULT;
10055                 }
10056
10057                 id = dynptr_id(env, reg);
10058                 if (id < 0) {
10059                         verbose(env, "verifier internal error: failed to obtain dynptr id\n");
10060                         return id;
10061                 }
10062
10063                 ref_obj_id = dynptr_ref_obj_id(env, reg);
10064                 if (ref_obj_id < 0) {
10065                         verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
10066                         return ref_obj_id;
10067                 }
10068
10069                 meta.dynptr_id = id;
10070                 meta.ref_obj_id = ref_obj_id;
10071
10072                 break;
10073         }
10074         case BPF_FUNC_dynptr_write:
10075         {
10076                 enum bpf_dynptr_type dynptr_type;
10077                 struct bpf_reg_state *reg;
10078
10079                 reg = get_dynptr_arg_reg(env, fn, regs);
10080                 if (!reg)
10081                         return -EFAULT;
10082
10083                 dynptr_type = dynptr_get_type(env, reg);
10084                 if (dynptr_type == BPF_DYNPTR_TYPE_INVALID)
10085                         return -EFAULT;
10086
10087                 if (dynptr_type == BPF_DYNPTR_TYPE_SKB)
10088                         /* this will trigger clear_all_pkt_pointers(), which will
10089                          * invalidate all dynptr slices associated with the skb
10090                          */
10091                         changes_data = true;
10092
10093                 break;
10094         }
10095         case BPF_FUNC_user_ringbuf_drain:
10096                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
10097                                         set_user_ringbuf_callback_state);
10098                 break;
10099         }
10100
10101         if (err)
10102                 return err;
10103
10104         /* reset caller saved regs */
10105         for (i = 0; i < CALLER_SAVED_REGS; i++) {
10106                 mark_reg_not_init(env, regs, caller_saved[i]);
10107                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
10108         }
10109
10110         /* helper call returns 64-bit value. */
10111         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
10112
10113         /* update return register (already marked as written above) */
10114         ret_type = fn->ret_type;
10115         ret_flag = type_flag(ret_type);
10116
10117         switch (base_type(ret_type)) {
10118         case RET_INTEGER:
10119                 /* sets type to SCALAR_VALUE */
10120                 mark_reg_unknown(env, regs, BPF_REG_0);
10121                 break;
10122         case RET_VOID:
10123                 regs[BPF_REG_0].type = NOT_INIT;
10124                 break;
10125         case RET_PTR_TO_MAP_VALUE:
10126                 /* There is no offset yet applied, variable or fixed */
10127                 mark_reg_known_zero(env, regs, BPF_REG_0);
10128                 /* remember map_ptr, so that check_map_access()
10129                  * can check 'value_size' boundary of memory access
10130                  * to map element returned from bpf_map_lookup_elem()
10131                  */
10132                 if (meta.map_ptr == NULL) {
10133                         verbose(env,
10134                                 "kernel subsystem misconfigured verifier\n");
10135                         return -EINVAL;
10136                 }
10137                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
10138                 regs[BPF_REG_0].map_uid = meta.map_uid;
10139                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
10140                 if (!type_may_be_null(ret_type) &&
10141                     btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) {
10142                         regs[BPF_REG_0].id = ++env->id_gen;
10143                 }
10144                 break;
10145         case RET_PTR_TO_SOCKET:
10146                 mark_reg_known_zero(env, regs, BPF_REG_0);
10147                 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
10148                 break;
10149         case RET_PTR_TO_SOCK_COMMON:
10150                 mark_reg_known_zero(env, regs, BPF_REG_0);
10151                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
10152                 break;
10153         case RET_PTR_TO_TCP_SOCK:
10154                 mark_reg_known_zero(env, regs, BPF_REG_0);
10155                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
10156                 break;
10157         case RET_PTR_TO_MEM:
10158                 mark_reg_known_zero(env, regs, BPF_REG_0);
10159                 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10160                 regs[BPF_REG_0].mem_size = meta.mem_size;
10161                 break;
10162         case RET_PTR_TO_MEM_OR_BTF_ID:
10163         {
10164                 const struct btf_type *t;
10165
10166                 mark_reg_known_zero(env, regs, BPF_REG_0);
10167                 t = btf_type_skip_modifiers(meta.ret_btf, meta.ret_btf_id, NULL);
10168                 if (!btf_type_is_struct(t)) {
10169                         u32 tsize;
10170                         const struct btf_type *ret;
10171                         const char *tname;
10172
10173                         /* resolve the type size of ksym. */
10174                         ret = btf_resolve_size(meta.ret_btf, t, &tsize);
10175                         if (IS_ERR(ret)) {
10176                                 tname = btf_name_by_offset(meta.ret_btf, t->name_off);
10177                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
10178                                         tname, PTR_ERR(ret));
10179                                 return -EINVAL;
10180                         }
10181                         regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
10182                         regs[BPF_REG_0].mem_size = tsize;
10183                 } else {
10184                         /* MEM_RDONLY may be carried from ret_flag, but it
10185                          * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
10186                          * it will confuse the check of PTR_TO_BTF_ID in
10187                          * check_mem_access().
10188                          */
10189                         ret_flag &= ~MEM_RDONLY;
10190
10191                         regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10192                         regs[BPF_REG_0].btf = meta.ret_btf;
10193                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
10194                 }
10195                 break;
10196         }
10197         case RET_PTR_TO_BTF_ID:
10198         {
10199                 struct btf *ret_btf;
10200                 int ret_btf_id;
10201
10202                 mark_reg_known_zero(env, regs, BPF_REG_0);
10203                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
10204                 if (func_id == BPF_FUNC_kptr_xchg) {
10205                         ret_btf = meta.kptr_field->kptr.btf;
10206                         ret_btf_id = meta.kptr_field->kptr.btf_id;
10207                         if (!btf_is_kernel(ret_btf))
10208                                 regs[BPF_REG_0].type |= MEM_ALLOC;
10209                 } else {
10210                         if (fn->ret_btf_id == BPF_PTR_POISON) {
10211                                 verbose(env, "verifier internal error:");
10212                                 verbose(env, "func %s has non-overwritten BPF_PTR_POISON return type\n",
10213                                         func_id_name(func_id));
10214                                 return -EINVAL;
10215                         }
10216                         ret_btf = btf_vmlinux;
10217                         ret_btf_id = *fn->ret_btf_id;
10218                 }
10219                 if (ret_btf_id == 0) {
10220                         verbose(env, "invalid return type %u of func %s#%d\n",
10221                                 base_type(ret_type), func_id_name(func_id),
10222                                 func_id);
10223                         return -EINVAL;
10224                 }
10225                 regs[BPF_REG_0].btf = ret_btf;
10226                 regs[BPF_REG_0].btf_id = ret_btf_id;
10227                 break;
10228         }
10229         default:
10230                 verbose(env, "unknown return type %u of func %s#%d\n",
10231                         base_type(ret_type), func_id_name(func_id), func_id);
10232                 return -EINVAL;
10233         }
10234
10235         if (type_may_be_null(regs[BPF_REG_0].type))
10236                 regs[BPF_REG_0].id = ++env->id_gen;
10237
10238         if (helper_multiple_ref_obj_use(func_id, meta.map_ptr)) {
10239                 verbose(env, "verifier internal error: func %s#%d sets ref_obj_id more than once\n",
10240                         func_id_name(func_id), func_id);
10241                 return -EFAULT;
10242         }
10243
10244         if (is_dynptr_ref_function(func_id))
10245                 regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
10246
10247         if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
10248                 /* For release_reference() */
10249                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
10250         } else if (is_acquire_function(func_id, meta.map_ptr)) {
10251                 int id = acquire_reference_state(env, insn_idx);
10252
10253                 if (id < 0)
10254                         return id;
10255                 /* For mark_ptr_or_null_reg() */
10256                 regs[BPF_REG_0].id = id;
10257                 /* For release_reference() */
10258                 regs[BPF_REG_0].ref_obj_id = id;
10259         }
10260
10261         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
10262
10263         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
10264         if (err)
10265                 return err;
10266
10267         if ((func_id == BPF_FUNC_get_stack ||
10268              func_id == BPF_FUNC_get_task_stack) &&
10269             !env->prog->has_callchain_buf) {
10270                 const char *err_str;
10271
10272 #ifdef CONFIG_PERF_EVENTS
10273                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
10274                 err_str = "cannot get callchain buffer for func %s#%d\n";
10275 #else
10276                 err = -ENOTSUPP;
10277                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
10278 #endif
10279                 if (err) {
10280                         verbose(env, err_str, func_id_name(func_id), func_id);
10281                         return err;
10282                 }
10283
10284                 env->prog->has_callchain_buf = true;
10285         }
10286
10287         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
10288                 env->prog->call_get_stack = true;
10289
10290         if (func_id == BPF_FUNC_get_func_ip) {
10291                 if (check_get_func_ip(env))
10292                         return -ENOTSUPP;
10293                 env->prog->call_get_func_ip = true;
10294         }
10295
10296         if (changes_data)
10297                 clear_all_pkt_pointers(env);
10298         return 0;
10299 }
10300
10301 /* mark_btf_func_reg_size() is used when the reg size is determined by
10302  * the BTF func_proto's return value size and argument.
10303  */
10304 static void mark_btf_func_reg_size(struct bpf_verifier_env *env, u32 regno,
10305                                    size_t reg_size)
10306 {
10307         struct bpf_reg_state *reg = &cur_regs(env)[regno];
10308
10309         if (regno == BPF_REG_0) {
10310                 /* Function return value */
10311                 reg->live |= REG_LIVE_WRITTEN;
10312                 reg->subreg_def = reg_size == sizeof(u64) ?
10313                         DEF_NOT_SUBREG : env->insn_idx + 1;
10314         } else {
10315                 /* Function argument */
10316                 if (reg_size == sizeof(u64)) {
10317                         mark_insn_zext(env, reg);
10318                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
10319                 } else {
10320                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ32);
10321                 }
10322         }
10323 }
10324
10325 static bool is_kfunc_acquire(struct bpf_kfunc_call_arg_meta *meta)
10326 {
10327         return meta->kfunc_flags & KF_ACQUIRE;
10328 }
10329
10330 static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
10331 {
10332         return meta->kfunc_flags & KF_RELEASE;
10333 }
10334
10335 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
10336 {
10337         return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
10338 }
10339
10340 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
10341 {
10342         return meta->kfunc_flags & KF_SLEEPABLE;
10343 }
10344
10345 static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
10346 {
10347         return meta->kfunc_flags & KF_DESTRUCTIVE;
10348 }
10349
10350 static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
10351 {
10352         return meta->kfunc_flags & KF_RCU;
10353 }
10354
10355 static bool __kfunc_param_match_suffix(const struct btf *btf,
10356                                        const struct btf_param *arg,
10357                                        const char *suffix)
10358 {
10359         int suffix_len = strlen(suffix), len;
10360         const char *param_name;
10361
10362         /* In the future, this can be ported to use BTF tagging */
10363         param_name = btf_name_by_offset(btf, arg->name_off);
10364         if (str_is_empty(param_name))
10365                 return false;
10366         len = strlen(param_name);
10367         if (len < suffix_len)
10368                 return false;
10369         param_name += len - suffix_len;
10370         return !strncmp(param_name, suffix, suffix_len);
10371 }
10372
10373 static bool is_kfunc_arg_mem_size(const struct btf *btf,
10374                                   const struct btf_param *arg,
10375                                   const struct bpf_reg_state *reg)
10376 {
10377         const struct btf_type *t;
10378
10379         t = btf_type_skip_modifiers(btf, arg->type, NULL);
10380         if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10381                 return false;
10382
10383         return __kfunc_param_match_suffix(btf, arg, "__sz");
10384 }
10385
10386 static bool is_kfunc_arg_const_mem_size(const struct btf *btf,
10387                                         const struct btf_param *arg,
10388                                         const struct bpf_reg_state *reg)
10389 {
10390         const struct btf_type *t;
10391
10392         t = btf_type_skip_modifiers(btf, arg->type, NULL);
10393         if (!btf_type_is_scalar(t) || reg->type != SCALAR_VALUE)
10394                 return false;
10395
10396         return __kfunc_param_match_suffix(btf, arg, "__szk");
10397 }
10398
10399 static bool is_kfunc_arg_optional(const struct btf *btf, const struct btf_param *arg)
10400 {
10401         return __kfunc_param_match_suffix(btf, arg, "__opt");
10402 }
10403
10404 static bool is_kfunc_arg_constant(const struct btf *btf, const struct btf_param *arg)
10405 {
10406         return __kfunc_param_match_suffix(btf, arg, "__k");
10407 }
10408
10409 static bool is_kfunc_arg_ignore(const struct btf *btf, const struct btf_param *arg)
10410 {
10411         return __kfunc_param_match_suffix(btf, arg, "__ign");
10412 }
10413
10414 static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param *arg)
10415 {
10416         return __kfunc_param_match_suffix(btf, arg, "__alloc");
10417 }
10418
10419 static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
10420 {
10421         return __kfunc_param_match_suffix(btf, arg, "__uninit");
10422 }
10423
10424 static bool is_kfunc_arg_refcounted_kptr(const struct btf *btf, const struct btf_param *arg)
10425 {
10426         return __kfunc_param_match_suffix(btf, arg, "__refcounted_kptr");
10427 }
10428
10429 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
10430                                           const struct btf_param *arg,
10431                                           const char *name)
10432 {
10433         int len, target_len = strlen(name);
10434         const char *param_name;
10435
10436         param_name = btf_name_by_offset(btf, arg->name_off);
10437         if (str_is_empty(param_name))
10438                 return false;
10439         len = strlen(param_name);
10440         if (len != target_len)
10441                 return false;
10442         if (strcmp(param_name, name))
10443                 return false;
10444
10445         return true;
10446 }
10447
10448 enum {
10449         KF_ARG_DYNPTR_ID,
10450         KF_ARG_LIST_HEAD_ID,
10451         KF_ARG_LIST_NODE_ID,
10452         KF_ARG_RB_ROOT_ID,
10453         KF_ARG_RB_NODE_ID,
10454 };
10455
10456 BTF_ID_LIST(kf_arg_btf_ids)
10457 BTF_ID(struct, bpf_dynptr_kern)
10458 BTF_ID(struct, bpf_list_head)
10459 BTF_ID(struct, bpf_list_node)
10460 BTF_ID(struct, bpf_rb_root)
10461 BTF_ID(struct, bpf_rb_node)
10462
10463 static bool __is_kfunc_ptr_arg_type(const struct btf *btf,
10464                                     const struct btf_param *arg, int type)
10465 {
10466         const struct btf_type *t;
10467         u32 res_id;
10468
10469         t = btf_type_skip_modifiers(btf, arg->type, NULL);
10470         if (!t)
10471                 return false;
10472         if (!btf_type_is_ptr(t))
10473                 return false;
10474         t = btf_type_skip_modifiers(btf, t->type, &res_id);
10475         if (!t)
10476                 return false;
10477         return btf_types_are_same(btf, res_id, btf_vmlinux, kf_arg_btf_ids[type]);
10478 }
10479
10480 static bool is_kfunc_arg_dynptr(const struct btf *btf, const struct btf_param *arg)
10481 {
10482         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_DYNPTR_ID);
10483 }
10484
10485 static bool is_kfunc_arg_list_head(const struct btf *btf, const struct btf_param *arg)
10486 {
10487         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_HEAD_ID);
10488 }
10489
10490 static bool is_kfunc_arg_list_node(const struct btf *btf, const struct btf_param *arg)
10491 {
10492         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_LIST_NODE_ID);
10493 }
10494
10495 static bool is_kfunc_arg_rbtree_root(const struct btf *btf, const struct btf_param *arg)
10496 {
10497         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_ROOT_ID);
10498 }
10499
10500 static bool is_kfunc_arg_rbtree_node(const struct btf *btf, const struct btf_param *arg)
10501 {
10502         return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RB_NODE_ID);
10503 }
10504
10505 static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
10506                                   const struct btf_param *arg)
10507 {
10508         const struct btf_type *t;
10509
10510         t = btf_type_resolve_func_ptr(btf, arg->type, NULL);
10511         if (!t)
10512                 return false;
10513
10514         return true;
10515 }
10516
10517 /* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
10518 static bool __btf_type_is_scalar_struct(struct bpf_verifier_env *env,
10519                                         const struct btf *btf,
10520                                         const struct btf_type *t, int rec)
10521 {
10522         const struct btf_type *member_type;
10523         const struct btf_member *member;
10524         u32 i;
10525
10526         if (!btf_type_is_struct(t))
10527                 return false;
10528
10529         for_each_member(i, t, member) {
10530                 const struct btf_array *array;
10531
10532                 member_type = btf_type_skip_modifiers(btf, member->type, NULL);
10533                 if (btf_type_is_struct(member_type)) {
10534                         if (rec >= 3) {
10535                                 verbose(env, "max struct nesting depth exceeded\n");
10536                                 return false;
10537                         }
10538                         if (!__btf_type_is_scalar_struct(env, btf, member_type, rec + 1))
10539                                 return false;
10540                         continue;
10541                 }
10542                 if (btf_type_is_array(member_type)) {
10543                         array = btf_array(member_type);
10544                         if (!array->nelems)
10545                                 return false;
10546                         member_type = btf_type_skip_modifiers(btf, array->type, NULL);
10547                         if (!btf_type_is_scalar(member_type))
10548                                 return false;
10549                         continue;
10550                 }
10551                 if (!btf_type_is_scalar(member_type))
10552                         return false;
10553         }
10554         return true;
10555 }
10556
10557 enum kfunc_ptr_arg_type {
10558         KF_ARG_PTR_TO_CTX,
10559         KF_ARG_PTR_TO_ALLOC_BTF_ID,    /* Allocated object */
10560         KF_ARG_PTR_TO_REFCOUNTED_KPTR, /* Refcounted local kptr */
10561         KF_ARG_PTR_TO_DYNPTR,
10562         KF_ARG_PTR_TO_ITER,
10563         KF_ARG_PTR_TO_LIST_HEAD,
10564         KF_ARG_PTR_TO_LIST_NODE,
10565         KF_ARG_PTR_TO_BTF_ID,          /* Also covers reg2btf_ids conversions */
10566         KF_ARG_PTR_TO_MEM,
10567         KF_ARG_PTR_TO_MEM_SIZE,        /* Size derived from next argument, skip it */
10568         KF_ARG_PTR_TO_CALLBACK,
10569         KF_ARG_PTR_TO_RB_ROOT,
10570         KF_ARG_PTR_TO_RB_NODE,
10571 };
10572
10573 enum special_kfunc_type {
10574         KF_bpf_obj_new_impl,
10575         KF_bpf_obj_drop_impl,
10576         KF_bpf_refcount_acquire_impl,
10577         KF_bpf_list_push_front_impl,
10578         KF_bpf_list_push_back_impl,
10579         KF_bpf_list_pop_front,
10580         KF_bpf_list_pop_back,
10581         KF_bpf_cast_to_kern_ctx,
10582         KF_bpf_rdonly_cast,
10583         KF_bpf_rcu_read_lock,
10584         KF_bpf_rcu_read_unlock,
10585         KF_bpf_rbtree_remove,
10586         KF_bpf_rbtree_add_impl,
10587         KF_bpf_rbtree_first,
10588         KF_bpf_dynptr_from_skb,
10589         KF_bpf_dynptr_from_xdp,
10590         KF_bpf_dynptr_slice,
10591         KF_bpf_dynptr_slice_rdwr,
10592         KF_bpf_dynptr_clone,
10593 };
10594
10595 BTF_SET_START(special_kfunc_set)
10596 BTF_ID(func, bpf_obj_new_impl)
10597 BTF_ID(func, bpf_obj_drop_impl)
10598 BTF_ID(func, bpf_refcount_acquire_impl)
10599 BTF_ID(func, bpf_list_push_front_impl)
10600 BTF_ID(func, bpf_list_push_back_impl)
10601 BTF_ID(func, bpf_list_pop_front)
10602 BTF_ID(func, bpf_list_pop_back)
10603 BTF_ID(func, bpf_cast_to_kern_ctx)
10604 BTF_ID(func, bpf_rdonly_cast)
10605 BTF_ID(func, bpf_rbtree_remove)
10606 BTF_ID(func, bpf_rbtree_add_impl)
10607 BTF_ID(func, bpf_rbtree_first)
10608 BTF_ID(func, bpf_dynptr_from_skb)
10609 BTF_ID(func, bpf_dynptr_from_xdp)
10610 BTF_ID(func, bpf_dynptr_slice)
10611 BTF_ID(func, bpf_dynptr_slice_rdwr)
10612 BTF_ID(func, bpf_dynptr_clone)
10613 BTF_SET_END(special_kfunc_set)
10614
10615 BTF_ID_LIST(special_kfunc_list)
10616 BTF_ID(func, bpf_obj_new_impl)
10617 BTF_ID(func, bpf_obj_drop_impl)
10618 BTF_ID(func, bpf_refcount_acquire_impl)
10619 BTF_ID(func, bpf_list_push_front_impl)
10620 BTF_ID(func, bpf_list_push_back_impl)
10621 BTF_ID(func, bpf_list_pop_front)
10622 BTF_ID(func, bpf_list_pop_back)
10623 BTF_ID(func, bpf_cast_to_kern_ctx)
10624 BTF_ID(func, bpf_rdonly_cast)
10625 BTF_ID(func, bpf_rcu_read_lock)
10626 BTF_ID(func, bpf_rcu_read_unlock)
10627 BTF_ID(func, bpf_rbtree_remove)
10628 BTF_ID(func, bpf_rbtree_add_impl)
10629 BTF_ID(func, bpf_rbtree_first)
10630 BTF_ID(func, bpf_dynptr_from_skb)
10631 BTF_ID(func, bpf_dynptr_from_xdp)
10632 BTF_ID(func, bpf_dynptr_slice)
10633 BTF_ID(func, bpf_dynptr_slice_rdwr)
10634 BTF_ID(func, bpf_dynptr_clone)
10635
10636 static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
10637 {
10638         if (meta->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
10639             meta->arg_owning_ref) {
10640                 return false;
10641         }
10642
10643         return meta->kfunc_flags & KF_RET_NULL;
10644 }
10645
10646 static bool is_kfunc_bpf_rcu_read_lock(struct bpf_kfunc_call_arg_meta *meta)
10647 {
10648         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_lock];
10649 }
10650
10651 static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
10652 {
10653         return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
10654 }
10655
10656 static enum kfunc_ptr_arg_type
10657 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
10658                        struct bpf_kfunc_call_arg_meta *meta,
10659                        const struct btf_type *t, const struct btf_type *ref_t,
10660                        const char *ref_tname, const struct btf_param *args,
10661                        int argno, int nargs)
10662 {
10663         u32 regno = argno + 1;
10664         struct bpf_reg_state *regs = cur_regs(env);
10665         struct bpf_reg_state *reg = &regs[regno];
10666         bool arg_mem_size = false;
10667
10668         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx])
10669                 return KF_ARG_PTR_TO_CTX;
10670
10671         /* In this function, we verify the kfunc's BTF as per the argument type,
10672          * leaving the rest of the verification with respect to the register
10673          * type to our caller. When a set of conditions hold in the BTF type of
10674          * arguments, we resolve it to a known kfunc_ptr_arg_type.
10675          */
10676         if (btf_get_prog_ctx_type(&env->log, meta->btf, t, resolve_prog_type(env->prog), argno))
10677                 return KF_ARG_PTR_TO_CTX;
10678
10679         if (is_kfunc_arg_alloc_obj(meta->btf, &args[argno]))
10680                 return KF_ARG_PTR_TO_ALLOC_BTF_ID;
10681
10682         if (is_kfunc_arg_refcounted_kptr(meta->btf, &args[argno]))
10683                 return KF_ARG_PTR_TO_REFCOUNTED_KPTR;
10684
10685         if (is_kfunc_arg_dynptr(meta->btf, &args[argno]))
10686                 return KF_ARG_PTR_TO_DYNPTR;
10687
10688         if (is_kfunc_arg_iter(meta, argno))
10689                 return KF_ARG_PTR_TO_ITER;
10690
10691         if (is_kfunc_arg_list_head(meta->btf, &args[argno]))
10692                 return KF_ARG_PTR_TO_LIST_HEAD;
10693
10694         if (is_kfunc_arg_list_node(meta->btf, &args[argno]))
10695                 return KF_ARG_PTR_TO_LIST_NODE;
10696
10697         if (is_kfunc_arg_rbtree_root(meta->btf, &args[argno]))
10698                 return KF_ARG_PTR_TO_RB_ROOT;
10699
10700         if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno]))
10701                 return KF_ARG_PTR_TO_RB_NODE;
10702
10703         if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) {
10704                 if (!btf_type_is_struct(ref_t)) {
10705                         verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n",
10706                                 meta->func_name, argno, btf_type_str(ref_t), ref_tname);
10707                         return -EINVAL;
10708                 }
10709                 return KF_ARG_PTR_TO_BTF_ID;
10710         }
10711
10712         if (is_kfunc_arg_callback(env, meta->btf, &args[argno]))
10713                 return KF_ARG_PTR_TO_CALLBACK;
10714
10715
10716         if (argno + 1 < nargs &&
10717             (is_kfunc_arg_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1]) ||
10718              is_kfunc_arg_const_mem_size(meta->btf, &args[argno + 1], &regs[regno + 1])))
10719                 arg_mem_size = true;
10720
10721         /* This is the catch all argument type of register types supported by
10722          * check_helper_mem_access. However, we only allow when argument type is
10723          * pointer to scalar, or struct composed (recursively) of scalars. When
10724          * arg_mem_size is true, the pointer can be void *.
10725          */
10726         if (!btf_type_is_scalar(ref_t) && !__btf_type_is_scalar_struct(env, meta->btf, ref_t, 0) &&
10727             (arg_mem_size ? !btf_type_is_void(ref_t) : 1)) {
10728                 verbose(env, "arg#%d pointer type %s %s must point to %sscalar, or struct with scalar\n",
10729                         argno, btf_type_str(ref_t), ref_tname, arg_mem_size ? "void, " : "");
10730                 return -EINVAL;
10731         }
10732         return arg_mem_size ? KF_ARG_PTR_TO_MEM_SIZE : KF_ARG_PTR_TO_MEM;
10733 }
10734
10735 static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
10736                                         struct bpf_reg_state *reg,
10737                                         const struct btf_type *ref_t,
10738                                         const char *ref_tname, u32 ref_id,
10739                                         struct bpf_kfunc_call_arg_meta *meta,
10740                                         int argno)
10741 {
10742         const struct btf_type *reg_ref_t;
10743         bool strict_type_match = false;
10744         const struct btf *reg_btf;
10745         const char *reg_ref_tname;
10746         u32 reg_ref_id;
10747
10748         if (base_type(reg->type) == PTR_TO_BTF_ID) {
10749                 reg_btf = reg->btf;
10750                 reg_ref_id = reg->btf_id;
10751         } else {
10752                 reg_btf = btf_vmlinux;
10753                 reg_ref_id = *reg2btf_ids[base_type(reg->type)];
10754         }
10755
10756         /* Enforce strict type matching for calls to kfuncs that are acquiring
10757          * or releasing a reference, or are no-cast aliases. We do _not_
10758          * enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
10759          * as we want to enable BPF programs to pass types that are bitwise
10760          * equivalent without forcing them to explicitly cast with something
10761          * like bpf_cast_to_kern_ctx().
10762          *
10763          * For example, say we had a type like the following:
10764          *
10765          * struct bpf_cpumask {
10766          *      cpumask_t cpumask;
10767          *      refcount_t usage;
10768          * };
10769          *
10770          * Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
10771          * to a struct cpumask, so it would be safe to pass a struct
10772          * bpf_cpumask * to a kfunc expecting a struct cpumask *.
10773          *
10774          * The philosophy here is similar to how we allow scalars of different
10775          * types to be passed to kfuncs as long as the size is the same. The
10776          * only difference here is that we're simply allowing
10777          * btf_struct_ids_match() to walk the struct at the 0th offset, and
10778          * resolve types.
10779          */
10780         if (is_kfunc_acquire(meta) ||
10781             (is_kfunc_release(meta) && reg->ref_obj_id) ||
10782             btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
10783                 strict_type_match = true;
10784
10785         WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
10786
10787         reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, &reg_ref_id);
10788         reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
10789         if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
10790                 verbose(env, "kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s\n",
10791                         meta->func_name, argno, btf_type_str(ref_t), ref_tname, argno + 1,
10792                         btf_type_str(reg_ref_t), reg_ref_tname);
10793                 return -EINVAL;
10794         }
10795         return 0;
10796 }
10797
10798 static int ref_set_non_owning(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
10799 {
10800         struct bpf_verifier_state *state = env->cur_state;
10801         struct btf_record *rec = reg_btf_record(reg);
10802
10803         if (!state->active_lock.ptr) {
10804                 verbose(env, "verifier internal error: ref_set_non_owning w/o active lock\n");
10805                 return -EFAULT;
10806         }
10807
10808         if (type_flag(reg->type) & NON_OWN_REF) {
10809                 verbose(env, "verifier internal error: NON_OWN_REF already set\n");
10810                 return -EFAULT;
10811         }
10812
10813         reg->type |= NON_OWN_REF;
10814         if (rec->refcount_off >= 0)
10815                 reg->type |= MEM_RCU;
10816
10817         return 0;
10818 }
10819
10820 static int ref_convert_owning_non_owning(struct bpf_verifier_env *env, u32 ref_obj_id)
10821 {
10822         struct bpf_func_state *state, *unused;
10823         struct bpf_reg_state *reg;
10824         int i;
10825
10826         state = cur_func(env);
10827
10828         if (!ref_obj_id) {
10829                 verbose(env, "verifier internal error: ref_obj_id is zero for "
10830                              "owning -> non-owning conversion\n");
10831                 return -EFAULT;
10832         }
10833
10834         for (i = 0; i < state->acquired_refs; i++) {
10835                 if (state->refs[i].id != ref_obj_id)
10836                         continue;
10837
10838                 /* Clear ref_obj_id here so release_reference doesn't clobber
10839                  * the whole reg
10840                  */
10841                 bpf_for_each_reg_in_vstate(env->cur_state, unused, reg, ({
10842                         if (reg->ref_obj_id == ref_obj_id) {
10843                                 reg->ref_obj_id = 0;
10844                                 ref_set_non_owning(env, reg);
10845                         }
10846                 }));
10847                 return 0;
10848         }
10849
10850         verbose(env, "verifier internal error: ref state missing for ref_obj_id\n");
10851         return -EFAULT;
10852 }
10853
10854 /* Implementation details:
10855  *
10856  * Each register points to some region of memory, which we define as an
10857  * allocation. Each allocation may embed a bpf_spin_lock which protects any
10858  * special BPF objects (bpf_list_head, bpf_rb_root, etc.) part of the same
10859  * allocation. The lock and the data it protects are colocated in the same
10860  * memory region.
10861  *
10862  * Hence, everytime a register holds a pointer value pointing to such
10863  * allocation, the verifier preserves a unique reg->id for it.
10864  *
10865  * The verifier remembers the lock 'ptr' and the lock 'id' whenever
10866  * bpf_spin_lock is called.
10867  *
10868  * To enable this, lock state in the verifier captures two values:
10869  *      active_lock.ptr = Register's type specific pointer
10870  *      active_lock.id  = A unique ID for each register pointer value
10871  *
10872  * Currently, PTR_TO_MAP_VALUE and PTR_TO_BTF_ID | MEM_ALLOC are the two
10873  * supported register types.
10874  *
10875  * The active_lock.ptr in case of map values is the reg->map_ptr, and in case of
10876  * allocated objects is the reg->btf pointer.
10877  *
10878  * The active_lock.id is non-unique for maps supporting direct_value_addr, as we
10879  * can establish the provenance of the map value statically for each distinct
10880  * lookup into such maps. They always contain a single map value hence unique
10881  * IDs for each pseudo load pessimizes the algorithm and rejects valid programs.
10882  *
10883  * So, in case of global variables, they use array maps with max_entries = 1,
10884  * hence their active_lock.ptr becomes map_ptr and id = 0 (since they all point
10885  * into the same map value as max_entries is 1, as described above).
10886  *
10887  * In case of inner map lookups, the inner map pointer has same map_ptr as the
10888  * outer map pointer (in verifier context), but each lookup into an inner map
10889  * assigns a fresh reg->id to the lookup, so while lookups into distinct inner
10890  * maps from the same outer map share the same map_ptr as active_lock.ptr, they
10891  * will get different reg->id assigned to each lookup, hence different
10892  * active_lock.id.
10893  *
10894  * In case of allocated objects, active_lock.ptr is the reg->btf, and the
10895  * reg->id is a unique ID preserved after the NULL pointer check on the pointer
10896  * returned from bpf_obj_new. Each allocation receives a new reg->id.
10897  */
10898 static int check_reg_allocation_locked(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
10899 {
10900         void *ptr;
10901         u32 id;
10902
10903         switch ((int)reg->type) {
10904         case PTR_TO_MAP_VALUE:
10905                 ptr = reg->map_ptr;
10906                 break;
10907         case PTR_TO_BTF_ID | MEM_ALLOC:
10908                 ptr = reg->btf;
10909                 break;
10910         default:
10911                 verbose(env, "verifier internal error: unknown reg type for lock check\n");
10912                 return -EFAULT;
10913         }
10914         id = reg->id;
10915
10916         if (!env->cur_state->active_lock.ptr)
10917                 return -EINVAL;
10918         if (env->cur_state->active_lock.ptr != ptr ||
10919             env->cur_state->active_lock.id != id) {
10920                 verbose(env, "held lock and object are not in the same allocation\n");
10921                 return -EINVAL;
10922         }
10923         return 0;
10924 }
10925
10926 static bool is_bpf_list_api_kfunc(u32 btf_id)
10927 {
10928         return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
10929                btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
10930                btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
10931                btf_id == special_kfunc_list[KF_bpf_list_pop_back];
10932 }
10933
10934 static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
10935 {
10936         return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
10937                btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10938                btf_id == special_kfunc_list[KF_bpf_rbtree_first];
10939 }
10940
10941 static bool is_bpf_graph_api_kfunc(u32 btf_id)
10942 {
10943         return is_bpf_list_api_kfunc(btf_id) || is_bpf_rbtree_api_kfunc(btf_id) ||
10944                btf_id == special_kfunc_list[KF_bpf_refcount_acquire_impl];
10945 }
10946
10947 static bool is_callback_calling_kfunc(u32 btf_id)
10948 {
10949         return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl];
10950 }
10951
10952 static bool is_rbtree_lock_required_kfunc(u32 btf_id)
10953 {
10954         return is_bpf_rbtree_api_kfunc(btf_id);
10955 }
10956
10957 static bool check_kfunc_is_graph_root_api(struct bpf_verifier_env *env,
10958                                           enum btf_field_type head_field_type,
10959                                           u32 kfunc_btf_id)
10960 {
10961         bool ret;
10962
10963         switch (head_field_type) {
10964         case BPF_LIST_HEAD:
10965                 ret = is_bpf_list_api_kfunc(kfunc_btf_id);
10966                 break;
10967         case BPF_RB_ROOT:
10968                 ret = is_bpf_rbtree_api_kfunc(kfunc_btf_id);
10969                 break;
10970         default:
10971                 verbose(env, "verifier internal error: unexpected graph root argument type %s\n",
10972                         btf_field_type_name(head_field_type));
10973                 return false;
10974         }
10975
10976         if (!ret)
10977                 verbose(env, "verifier internal error: %s head arg for unknown kfunc\n",
10978                         btf_field_type_name(head_field_type));
10979         return ret;
10980 }
10981
10982 static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
10983                                           enum btf_field_type node_field_type,
10984                                           u32 kfunc_btf_id)
10985 {
10986         bool ret;
10987
10988         switch (node_field_type) {
10989         case BPF_LIST_NODE:
10990                 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
10991                        kfunc_btf_id == special_kfunc_list[KF_bpf_list_push_back_impl]);
10992                 break;
10993         case BPF_RB_NODE:
10994                 ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
10995                        kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
10996                 break;
10997         default:
10998                 verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
10999                         btf_field_type_name(node_field_type));
11000                 return false;
11001         }
11002
11003         if (!ret)
11004                 verbose(env, "verifier internal error: %s node arg for unknown kfunc\n",
11005                         btf_field_type_name(node_field_type));
11006         return ret;
11007 }
11008
11009 static int
11010 __process_kf_arg_ptr_to_graph_root(struct bpf_verifier_env *env,
11011                                    struct bpf_reg_state *reg, u32 regno,
11012                                    struct bpf_kfunc_call_arg_meta *meta,
11013                                    enum btf_field_type head_field_type,
11014                                    struct btf_field **head_field)
11015 {
11016         const char *head_type_name;
11017         struct btf_field *field;
11018         struct btf_record *rec;
11019         u32 head_off;
11020
11021         if (meta->btf != btf_vmlinux) {
11022                 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
11023                 return -EFAULT;
11024         }
11025
11026         if (!check_kfunc_is_graph_root_api(env, head_field_type, meta->func_id))
11027                 return -EFAULT;
11028
11029         head_type_name = btf_field_type_name(head_field_type);
11030         if (!tnum_is_const(reg->var_off)) {
11031                 verbose(env,
11032                         "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11033                         regno, head_type_name);
11034                 return -EINVAL;
11035         }
11036
11037         rec = reg_btf_record(reg);
11038         head_off = reg->off + reg->var_off.value;
11039         field = btf_record_find(rec, head_off, head_field_type);
11040         if (!field) {
11041                 verbose(env, "%s not found at offset=%u\n", head_type_name, head_off);
11042                 return -EINVAL;
11043         }
11044
11045         /* All functions require bpf_list_head to be protected using a bpf_spin_lock */
11046         if (check_reg_allocation_locked(env, reg)) {
11047                 verbose(env, "bpf_spin_lock at off=%d must be held for %s\n",
11048                         rec->spin_lock_off, head_type_name);
11049                 return -EINVAL;
11050         }
11051
11052         if (*head_field) {
11053                 verbose(env, "verifier internal error: repeating %s arg\n", head_type_name);
11054                 return -EFAULT;
11055         }
11056         *head_field = field;
11057         return 0;
11058 }
11059
11060 static int process_kf_arg_ptr_to_list_head(struct bpf_verifier_env *env,
11061                                            struct bpf_reg_state *reg, u32 regno,
11062                                            struct bpf_kfunc_call_arg_meta *meta)
11063 {
11064         return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_LIST_HEAD,
11065                                                           &meta->arg_list_head.field);
11066 }
11067
11068 static int process_kf_arg_ptr_to_rbtree_root(struct bpf_verifier_env *env,
11069                                              struct bpf_reg_state *reg, u32 regno,
11070                                              struct bpf_kfunc_call_arg_meta *meta)
11071 {
11072         return __process_kf_arg_ptr_to_graph_root(env, reg, regno, meta, BPF_RB_ROOT,
11073                                                           &meta->arg_rbtree_root.field);
11074 }
11075
11076 static int
11077 __process_kf_arg_ptr_to_graph_node(struct bpf_verifier_env *env,
11078                                    struct bpf_reg_state *reg, u32 regno,
11079                                    struct bpf_kfunc_call_arg_meta *meta,
11080                                    enum btf_field_type head_field_type,
11081                                    enum btf_field_type node_field_type,
11082                                    struct btf_field **node_field)
11083 {
11084         const char *node_type_name;
11085         const struct btf_type *et, *t;
11086         struct btf_field *field;
11087         u32 node_off;
11088
11089         if (meta->btf != btf_vmlinux) {
11090                 verbose(env, "verifier internal error: unexpected btf mismatch in kfunc call\n");
11091                 return -EFAULT;
11092         }
11093
11094         if (!check_kfunc_is_graph_node_api(env, node_field_type, meta->func_id))
11095                 return -EFAULT;
11096
11097         node_type_name = btf_field_type_name(node_field_type);
11098         if (!tnum_is_const(reg->var_off)) {
11099                 verbose(env,
11100                         "R%d doesn't have constant offset. %s has to be at the constant offset\n",
11101                         regno, node_type_name);
11102                 return -EINVAL;
11103         }
11104
11105         node_off = reg->off + reg->var_off.value;
11106         field = reg_find_field_offset(reg, node_off, node_field_type);
11107         if (!field || field->offset != node_off) {
11108                 verbose(env, "%s not found at offset=%u\n", node_type_name, node_off);
11109                 return -EINVAL;
11110         }
11111
11112         field = *node_field;
11113
11114         et = btf_type_by_id(field->graph_root.btf, field->graph_root.value_btf_id);
11115         t = btf_type_by_id(reg->btf, reg->btf_id);
11116         if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, 0, field->graph_root.btf,
11117                                   field->graph_root.value_btf_id, true)) {
11118                 verbose(env, "operation on %s expects arg#1 %s at offset=%d "
11119                         "in struct %s, but arg is at offset=%d in struct %s\n",
11120                         btf_field_type_name(head_field_type),
11121                         btf_field_type_name(node_field_type),
11122                         field->graph_root.node_offset,
11123                         btf_name_by_offset(field->graph_root.btf, et->name_off),
11124                         node_off, btf_name_by_offset(reg->btf, t->name_off));
11125                 return -EINVAL;
11126         }
11127         meta->arg_btf = reg->btf;
11128         meta->arg_btf_id = reg->btf_id;
11129
11130         if (node_off != field->graph_root.node_offset) {
11131                 verbose(env, "arg#1 offset=%d, but expected %s at offset=%d in struct %s\n",
11132                         node_off, btf_field_type_name(node_field_type),
11133                         field->graph_root.node_offset,
11134                         btf_name_by_offset(field->graph_root.btf, et->name_off));
11135                 return -EINVAL;
11136         }
11137
11138         return 0;
11139 }
11140
11141 static int process_kf_arg_ptr_to_list_node(struct bpf_verifier_env *env,
11142                                            struct bpf_reg_state *reg, u32 regno,
11143                                            struct bpf_kfunc_call_arg_meta *meta)
11144 {
11145         return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11146                                                   BPF_LIST_HEAD, BPF_LIST_NODE,
11147                                                   &meta->arg_list_head.field);
11148 }
11149
11150 static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env,
11151                                              struct bpf_reg_state *reg, u32 regno,
11152                                              struct bpf_kfunc_call_arg_meta *meta)
11153 {
11154         return __process_kf_arg_ptr_to_graph_node(env, reg, regno, meta,
11155                                                   BPF_RB_ROOT, BPF_RB_NODE,
11156                                                   &meta->arg_rbtree_root.field);
11157 }
11158
11159 static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta,
11160                             int insn_idx)
11161 {
11162         const char *func_name = meta->func_name, *ref_tname;
11163         const struct btf *btf = meta->btf;
11164         const struct btf_param *args;
11165         struct btf_record *rec;
11166         u32 i, nargs;
11167         int ret;
11168
11169         args = (const struct btf_param *)(meta->func_proto + 1);
11170         nargs = btf_type_vlen(meta->func_proto);
11171         if (nargs > MAX_BPF_FUNC_REG_ARGS) {
11172                 verbose(env, "Function %s has %d > %d args\n", func_name, nargs,
11173                         MAX_BPF_FUNC_REG_ARGS);
11174                 return -EINVAL;
11175         }
11176
11177         /* Check that BTF function arguments match actual types that the
11178          * verifier sees.
11179          */
11180         for (i = 0; i < nargs; i++) {
11181                 struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[i + 1];
11182                 const struct btf_type *t, *ref_t, *resolve_ret;
11183                 enum bpf_arg_type arg_type = ARG_DONTCARE;
11184                 u32 regno = i + 1, ref_id, type_size;
11185                 bool is_ret_buf_sz = false;
11186                 int kf_arg_type;
11187
11188                 t = btf_type_skip_modifiers(btf, args[i].type, NULL);
11189
11190                 if (is_kfunc_arg_ignore(btf, &args[i]))
11191                         continue;
11192
11193                 if (btf_type_is_scalar(t)) {
11194                         if (reg->type != SCALAR_VALUE) {
11195                                 verbose(env, "R%d is not a scalar\n", regno);
11196                                 return -EINVAL;
11197                         }
11198
11199                         if (is_kfunc_arg_constant(meta->btf, &args[i])) {
11200                                 if (meta->arg_constant.found) {
11201                                         verbose(env, "verifier internal error: only one constant argument permitted\n");
11202                                         return -EFAULT;
11203                                 }
11204                                 if (!tnum_is_const(reg->var_off)) {
11205                                         verbose(env, "R%d must be a known constant\n", regno);
11206                                         return -EINVAL;
11207                                 }
11208                                 ret = mark_chain_precision(env, regno);
11209                                 if (ret < 0)
11210                                         return ret;
11211                                 meta->arg_constant.found = true;
11212                                 meta->arg_constant.value = reg->var_off.value;
11213                         } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdonly_buf_size")) {
11214                                 meta->r0_rdonly = true;
11215                                 is_ret_buf_sz = true;
11216                         } else if (is_kfunc_arg_scalar_with_name(btf, &args[i], "rdwr_buf_size")) {
11217                                 is_ret_buf_sz = true;
11218                         }
11219
11220                         if (is_ret_buf_sz) {
11221                                 if (meta->r0_size) {
11222                                         verbose(env, "2 or more rdonly/rdwr_buf_size parameters for kfunc");
11223                                         return -EINVAL;
11224                                 }
11225
11226                                 if (!tnum_is_const(reg->var_off)) {
11227                                         verbose(env, "R%d is not a const\n", regno);
11228                                         return -EINVAL;
11229                                 }
11230
11231                                 meta->r0_size = reg->var_off.value;
11232                                 ret = mark_chain_precision(env, regno);
11233                                 if (ret)
11234                                         return ret;
11235                         }
11236                         continue;
11237                 }
11238
11239                 if (!btf_type_is_ptr(t)) {
11240                         verbose(env, "Unrecognized arg#%d type %s\n", i, btf_type_str(t));
11241                         return -EINVAL;
11242                 }
11243
11244                 if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
11245                     (register_is_null(reg) || type_may_be_null(reg->type))) {
11246                         verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
11247                         return -EACCES;
11248                 }
11249
11250                 if (reg->ref_obj_id) {
11251                         if (is_kfunc_release(meta) && meta->ref_obj_id) {
11252                                 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
11253                                         regno, reg->ref_obj_id,
11254                                         meta->ref_obj_id);
11255                                 return -EFAULT;
11256                         }
11257                         meta->ref_obj_id = reg->ref_obj_id;
11258                         if (is_kfunc_release(meta))
11259                                 meta->release_regno = regno;
11260                 }
11261
11262                 ref_t = btf_type_skip_modifiers(btf, t->type, &ref_id);
11263                 ref_tname = btf_name_by_offset(btf, ref_t->name_off);
11264
11265                 kf_arg_type = get_kfunc_ptr_arg_type(env, meta, t, ref_t, ref_tname, args, i, nargs);
11266                 if (kf_arg_type < 0)
11267                         return kf_arg_type;
11268
11269                 switch (kf_arg_type) {
11270                 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11271                 case KF_ARG_PTR_TO_BTF_ID:
11272                         if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
11273                                 break;
11274
11275                         if (!is_trusted_reg(reg)) {
11276                                 if (!is_kfunc_rcu(meta)) {
11277                                         verbose(env, "R%d must be referenced or trusted\n", regno);
11278                                         return -EINVAL;
11279                                 }
11280                                 if (!is_rcu_reg(reg)) {
11281                                         verbose(env, "R%d must be a rcu pointer\n", regno);
11282                                         return -EINVAL;
11283                                 }
11284                         }
11285
11286                         fallthrough;
11287                 case KF_ARG_PTR_TO_CTX:
11288                         /* Trusted arguments have the same offset checks as release arguments */
11289                         arg_type |= OBJ_RELEASE;
11290                         break;
11291                 case KF_ARG_PTR_TO_DYNPTR:
11292                 case KF_ARG_PTR_TO_ITER:
11293                 case KF_ARG_PTR_TO_LIST_HEAD:
11294                 case KF_ARG_PTR_TO_LIST_NODE:
11295                 case KF_ARG_PTR_TO_RB_ROOT:
11296                 case KF_ARG_PTR_TO_RB_NODE:
11297                 case KF_ARG_PTR_TO_MEM:
11298                 case KF_ARG_PTR_TO_MEM_SIZE:
11299                 case KF_ARG_PTR_TO_CALLBACK:
11300                 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11301                         /* Trusted by default */
11302                         break;
11303                 default:
11304                         WARN_ON_ONCE(1);
11305                         return -EFAULT;
11306                 }
11307
11308                 if (is_kfunc_release(meta) && reg->ref_obj_id)
11309                         arg_type |= OBJ_RELEASE;
11310                 ret = check_func_arg_reg_off(env, reg, regno, arg_type);
11311                 if (ret < 0)
11312                         return ret;
11313
11314                 switch (kf_arg_type) {
11315                 case KF_ARG_PTR_TO_CTX:
11316                         if (reg->type != PTR_TO_CTX) {
11317                                 verbose(env, "arg#%d expected pointer to ctx, but got %s\n", i, btf_type_str(t));
11318                                 return -EINVAL;
11319                         }
11320
11321                         if (meta->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
11322                                 ret = get_kern_ctx_btf_id(&env->log, resolve_prog_type(env->prog));
11323                                 if (ret < 0)
11324                                         return -EINVAL;
11325                                 meta->ret_btf_id  = ret;
11326                         }
11327                         break;
11328                 case KF_ARG_PTR_TO_ALLOC_BTF_ID:
11329                         if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11330                                 verbose(env, "arg#%d expected pointer to allocated object\n", i);
11331                                 return -EINVAL;
11332                         }
11333                         if (!reg->ref_obj_id) {
11334                                 verbose(env, "allocated object must be referenced\n");
11335                                 return -EINVAL;
11336                         }
11337                         if (meta->btf == btf_vmlinux &&
11338                             meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
11339                                 meta->arg_btf = reg->btf;
11340                                 meta->arg_btf_id = reg->btf_id;
11341                         }
11342                         break;
11343                 case KF_ARG_PTR_TO_DYNPTR:
11344                 {
11345                         enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
11346                         int clone_ref_obj_id = 0;
11347
11348                         if (reg->type != PTR_TO_STACK &&
11349                             reg->type != CONST_PTR_TO_DYNPTR) {
11350                                 verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
11351                                 return -EINVAL;
11352                         }
11353
11354                         if (reg->type == CONST_PTR_TO_DYNPTR)
11355                                 dynptr_arg_type |= MEM_RDONLY;
11356
11357                         if (is_kfunc_arg_uninit(btf, &args[i]))
11358                                 dynptr_arg_type |= MEM_UNINIT;
11359
11360                         if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
11361                                 dynptr_arg_type |= DYNPTR_TYPE_SKB;
11362                         } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_from_xdp]) {
11363                                 dynptr_arg_type |= DYNPTR_TYPE_XDP;
11364                         } else if (meta->func_id == special_kfunc_list[KF_bpf_dynptr_clone] &&
11365                                    (dynptr_arg_type & MEM_UNINIT)) {
11366                                 enum bpf_dynptr_type parent_type = meta->initialized_dynptr.type;
11367
11368                                 if (parent_type == BPF_DYNPTR_TYPE_INVALID) {
11369                                         verbose(env, "verifier internal error: no dynptr type for parent of clone\n");
11370                                         return -EFAULT;
11371                                 }
11372
11373                                 dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
11374                                 clone_ref_obj_id = meta->initialized_dynptr.ref_obj_id;
11375                                 if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
11376                                         verbose(env, "verifier internal error: missing ref obj id for parent of clone\n");
11377                                         return -EFAULT;
11378                                 }
11379                         }
11380
11381                         ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id);
11382                         if (ret < 0)
11383                                 return ret;
11384
11385                         if (!(dynptr_arg_type & MEM_UNINIT)) {
11386                                 int id = dynptr_id(env, reg);
11387
11388                                 if (id < 0) {
11389                                         verbose(env, "verifier internal error: failed to obtain dynptr id\n");
11390                                         return id;
11391                                 }
11392                                 meta->initialized_dynptr.id = id;
11393                                 meta->initialized_dynptr.type = dynptr_get_type(env, reg);
11394                                 meta->initialized_dynptr.ref_obj_id = dynptr_ref_obj_id(env, reg);
11395                         }
11396
11397                         break;
11398                 }
11399                 case KF_ARG_PTR_TO_ITER:
11400                         ret = process_iter_arg(env, regno, insn_idx, meta);
11401                         if (ret < 0)
11402                                 return ret;
11403                         break;
11404                 case KF_ARG_PTR_TO_LIST_HEAD:
11405                         if (reg->type != PTR_TO_MAP_VALUE &&
11406                             reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11407                                 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
11408                                 return -EINVAL;
11409                         }
11410                         if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
11411                                 verbose(env, "allocated object must be referenced\n");
11412                                 return -EINVAL;
11413                         }
11414                         ret = process_kf_arg_ptr_to_list_head(env, reg, regno, meta);
11415                         if (ret < 0)
11416                                 return ret;
11417                         break;
11418                 case KF_ARG_PTR_TO_RB_ROOT:
11419                         if (reg->type != PTR_TO_MAP_VALUE &&
11420                             reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11421                                 verbose(env, "arg#%d expected pointer to map value or allocated object\n", i);
11422                                 return -EINVAL;
11423                         }
11424                         if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC) && !reg->ref_obj_id) {
11425                                 verbose(env, "allocated object must be referenced\n");
11426                                 return -EINVAL;
11427                         }
11428                         ret = process_kf_arg_ptr_to_rbtree_root(env, reg, regno, meta);
11429                         if (ret < 0)
11430                                 return ret;
11431                         break;
11432                 case KF_ARG_PTR_TO_LIST_NODE:
11433                         if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11434                                 verbose(env, "arg#%d expected pointer to allocated object\n", i);
11435                                 return -EINVAL;
11436                         }
11437                         if (!reg->ref_obj_id) {
11438                                 verbose(env, "allocated object must be referenced\n");
11439                                 return -EINVAL;
11440                         }
11441                         ret = process_kf_arg_ptr_to_list_node(env, reg, regno, meta);
11442                         if (ret < 0)
11443                                 return ret;
11444                         break;
11445                 case KF_ARG_PTR_TO_RB_NODE:
11446                         if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
11447                                 if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
11448                                         verbose(env, "rbtree_remove node input must be non-owning ref\n");
11449                                         return -EINVAL;
11450                                 }
11451                                 if (in_rbtree_lock_required_cb(env)) {
11452                                         verbose(env, "rbtree_remove not allowed in rbtree cb\n");
11453                                         return -EINVAL;
11454                                 }
11455                         } else {
11456                                 if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
11457                                         verbose(env, "arg#%d expected pointer to allocated object\n", i);
11458                                         return -EINVAL;
11459                                 }
11460                                 if (!reg->ref_obj_id) {
11461                                         verbose(env, "allocated object must be referenced\n");
11462                                         return -EINVAL;
11463                                 }
11464                         }
11465
11466                         ret = process_kf_arg_ptr_to_rbtree_node(env, reg, regno, meta);
11467                         if (ret < 0)
11468                                 return ret;
11469                         break;
11470                 case KF_ARG_PTR_TO_BTF_ID:
11471                         /* Only base_type is checked, further checks are done here */
11472                         if ((base_type(reg->type) != PTR_TO_BTF_ID ||
11473                              (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
11474                             !reg2btf_ids[base_type(reg->type)]) {
11475                                 verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
11476                                 verbose(env, "expected %s or socket\n",
11477                                         reg_type_str(env, base_type(reg->type) |
11478                                                           (type_flag(reg->type) & BPF_REG_TRUSTED_MODIFIERS)));
11479                                 return -EINVAL;
11480                         }
11481                         ret = process_kf_arg_ptr_to_btf_id(env, reg, ref_t, ref_tname, ref_id, meta, i);
11482                         if (ret < 0)
11483                                 return ret;
11484                         break;
11485                 case KF_ARG_PTR_TO_MEM:
11486                         resolve_ret = btf_resolve_size(btf, ref_t, &type_size);
11487                         if (IS_ERR(resolve_ret)) {
11488                                 verbose(env, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
11489                                         i, btf_type_str(ref_t), ref_tname, PTR_ERR(resolve_ret));
11490                                 return -EINVAL;
11491                         }
11492                         ret = check_mem_reg(env, reg, regno, type_size);
11493                         if (ret < 0)
11494                                 return ret;
11495                         break;
11496                 case KF_ARG_PTR_TO_MEM_SIZE:
11497                 {
11498                         struct bpf_reg_state *buff_reg = &regs[regno];
11499                         const struct btf_param *buff_arg = &args[i];
11500                         struct bpf_reg_state *size_reg = &regs[regno + 1];
11501                         const struct btf_param *size_arg = &args[i + 1];
11502
11503                         if (!register_is_null(buff_reg) || !is_kfunc_arg_optional(meta->btf, buff_arg)) {
11504                                 ret = check_kfunc_mem_size_reg(env, size_reg, regno + 1);
11505                                 if (ret < 0) {
11506                                         verbose(env, "arg#%d arg#%d memory, len pair leads to invalid memory access\n", i, i + 1);
11507                                         return ret;
11508                                 }
11509                         }
11510
11511                         if (is_kfunc_arg_const_mem_size(meta->btf, size_arg, size_reg)) {
11512                                 if (meta->arg_constant.found) {
11513                                         verbose(env, "verifier internal error: only one constant argument permitted\n");
11514                                         return -EFAULT;
11515                                 }
11516                                 if (!tnum_is_const(size_reg->var_off)) {
11517                                         verbose(env, "R%d must be a known constant\n", regno + 1);
11518                                         return -EINVAL;
11519                                 }
11520                                 meta->arg_constant.found = true;
11521                                 meta->arg_constant.value = size_reg->var_off.value;
11522                         }
11523
11524                         /* Skip next '__sz' or '__szk' argument */
11525                         i++;
11526                         break;
11527                 }
11528                 case KF_ARG_PTR_TO_CALLBACK:
11529                         if (reg->type != PTR_TO_FUNC) {
11530                                 verbose(env, "arg%d expected pointer to func\n", i);
11531                                 return -EINVAL;
11532                         }
11533                         meta->subprogno = reg->subprogno;
11534                         break;
11535                 case KF_ARG_PTR_TO_REFCOUNTED_KPTR:
11536                         if (!type_is_ptr_alloc_obj(reg->type)) {
11537                                 verbose(env, "arg#%d is neither owning or non-owning ref\n", i);
11538                                 return -EINVAL;
11539                         }
11540                         if (!type_is_non_owning_ref(reg->type))
11541                                 meta->arg_owning_ref = true;
11542
11543                         rec = reg_btf_record(reg);
11544                         if (!rec) {
11545                                 verbose(env, "verifier internal error: Couldn't find btf_record\n");
11546                                 return -EFAULT;
11547                         }
11548
11549                         if (rec->refcount_off < 0) {
11550                                 verbose(env, "arg#%d doesn't point to a type with bpf_refcount field\n", i);
11551                                 return -EINVAL;
11552                         }
11553
11554                         meta->arg_btf = reg->btf;
11555                         meta->arg_btf_id = reg->btf_id;
11556                         break;
11557                 }
11558         }
11559
11560         if (is_kfunc_release(meta) && !meta->release_regno) {
11561                 verbose(env, "release kernel function %s expects refcounted PTR_TO_BTF_ID\n",
11562                         func_name);
11563                 return -EINVAL;
11564         }
11565
11566         return 0;
11567 }
11568
11569 static int fetch_kfunc_meta(struct bpf_verifier_env *env,
11570                             struct bpf_insn *insn,
11571                             struct bpf_kfunc_call_arg_meta *meta,
11572                             const char **kfunc_name)
11573 {
11574         const struct btf_type *func, *func_proto;
11575         u32 func_id, *kfunc_flags;
11576         const char *func_name;
11577         struct btf *desc_btf;
11578
11579         if (kfunc_name)
11580                 *kfunc_name = NULL;
11581
11582         if (!insn->imm)
11583                 return -EINVAL;
11584
11585         desc_btf = find_kfunc_desc_btf(env, insn->off);
11586         if (IS_ERR(desc_btf))
11587                 return PTR_ERR(desc_btf);
11588
11589         func_id = insn->imm;
11590         func = btf_type_by_id(desc_btf, func_id);
11591         func_name = btf_name_by_offset(desc_btf, func->name_off);
11592         if (kfunc_name)
11593                 *kfunc_name = func_name;
11594         func_proto = btf_type_by_id(desc_btf, func->type);
11595
11596         kfunc_flags = btf_kfunc_id_set_contains(desc_btf, func_id, env->prog);
11597         if (!kfunc_flags) {
11598                 return -EACCES;
11599         }
11600
11601         memset(meta, 0, sizeof(*meta));
11602         meta->btf = desc_btf;
11603         meta->func_id = func_id;
11604         meta->kfunc_flags = *kfunc_flags;
11605         meta->func_proto = func_proto;
11606         meta->func_name = func_name;
11607
11608         return 0;
11609 }
11610
11611 static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
11612                             int *insn_idx_p)
11613 {
11614         const struct btf_type *t, *ptr_type;
11615         u32 i, nargs, ptr_type_id, release_ref_obj_id;
11616         struct bpf_reg_state *regs = cur_regs(env);
11617         const char *func_name, *ptr_type_name;
11618         bool sleepable, rcu_lock, rcu_unlock;
11619         struct bpf_kfunc_call_arg_meta meta;
11620         struct bpf_insn_aux_data *insn_aux;
11621         int err, insn_idx = *insn_idx_p;
11622         const struct btf_param *args;
11623         const struct btf_type *ret_t;
11624         struct btf *desc_btf;
11625
11626         /* skip for now, but return error when we find this in fixup_kfunc_call */
11627         if (!insn->imm)
11628                 return 0;
11629
11630         err = fetch_kfunc_meta(env, insn, &meta, &func_name);
11631         if (err == -EACCES && func_name)
11632                 verbose(env, "calling kernel function %s is not allowed\n", func_name);
11633         if (err)
11634                 return err;
11635         desc_btf = meta.btf;
11636         insn_aux = &env->insn_aux_data[insn_idx];
11637
11638         insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
11639
11640         if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
11641                 verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
11642                 return -EACCES;
11643         }
11644
11645         sleepable = is_kfunc_sleepable(&meta);
11646         if (sleepable && !env->prog->aux->sleepable) {
11647                 verbose(env, "program must be sleepable to call sleepable kfunc %s\n", func_name);
11648                 return -EACCES;
11649         }
11650
11651         rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
11652         rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);
11653
11654         if (env->cur_state->active_rcu_lock) {
11655                 struct bpf_func_state *state;
11656                 struct bpf_reg_state *reg;
11657
11658                 if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) {
11659                         verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n");
11660                         return -EACCES;
11661                 }
11662
11663                 if (rcu_lock) {
11664                         verbose(env, "nested rcu read lock (kernel function %s)\n", func_name);
11665                         return -EINVAL;
11666                 } else if (rcu_unlock) {
11667                         bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
11668                                 if (reg->type & MEM_RCU) {
11669                                         reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL);
11670                                         reg->type |= PTR_UNTRUSTED;
11671                                 }
11672                         }));
11673                         env->cur_state->active_rcu_lock = false;
11674                 } else if (sleepable) {
11675                         verbose(env, "kernel func %s is sleepable within rcu_read_lock region\n", func_name);
11676                         return -EACCES;
11677                 }
11678         } else if (rcu_lock) {
11679                 env->cur_state->active_rcu_lock = true;
11680         } else if (rcu_unlock) {
11681                 verbose(env, "unmatched rcu read unlock (kernel function %s)\n", func_name);
11682                 return -EINVAL;
11683         }
11684
11685         /* Check the arguments */
11686         err = check_kfunc_args(env, &meta, insn_idx);
11687         if (err < 0)
11688                 return err;
11689         /* In case of release function, we get register number of refcounted
11690          * PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
11691          */
11692         if (meta.release_regno) {
11693                 err = release_reference(env, regs[meta.release_regno].ref_obj_id);
11694                 if (err) {
11695                         verbose(env, "kfunc %s#%d reference has not been acquired before\n",
11696                                 func_name, meta.func_id);
11697                         return err;
11698                 }
11699         }
11700
11701         if (meta.func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
11702             meta.func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
11703             meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
11704                 release_ref_obj_id = regs[BPF_REG_2].ref_obj_id;
11705                 insn_aux->insert_off = regs[BPF_REG_2].off;
11706                 insn_aux->kptr_struct_meta = btf_find_struct_meta(meta.arg_btf, meta.arg_btf_id);
11707                 err = ref_convert_owning_non_owning(env, release_ref_obj_id);
11708                 if (err) {
11709                         verbose(env, "kfunc %s#%d conversion of owning ref to non-owning failed\n",
11710                                 func_name, meta.func_id);
11711                         return err;
11712                 }
11713
11714                 err = release_reference(env, release_ref_obj_id);
11715                 if (err) {
11716                         verbose(env, "kfunc %s#%d reference has not been acquired before\n",
11717                                 func_name, meta.func_id);
11718                         return err;
11719                 }
11720         }
11721
11722         if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
11723                 err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
11724                                         set_rbtree_add_callback_state);
11725                 if (err) {
11726                         verbose(env, "kfunc %s#%d failed callback verification\n",
11727                                 func_name, meta.func_id);
11728                         return err;
11729                 }
11730         }
11731
11732         for (i = 0; i < CALLER_SAVED_REGS; i++)
11733                 mark_reg_not_init(env, regs, caller_saved[i]);
11734
11735         /* Check return type */
11736         t = btf_type_skip_modifiers(desc_btf, meta.func_proto->type, NULL);
11737
11738         if (is_kfunc_acquire(&meta) && !btf_type_is_struct_ptr(meta.btf, t)) {
11739                 /* Only exception is bpf_obj_new_impl */
11740                 if (meta.btf != btf_vmlinux ||
11741                     (meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
11742                      meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
11743                         verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
11744                         return -EINVAL;
11745                 }
11746         }
11747
11748         if (btf_type_is_scalar(t)) {
11749                 mark_reg_unknown(env, regs, BPF_REG_0);
11750                 mark_btf_func_reg_size(env, BPF_REG_0, t->size);
11751         } else if (btf_type_is_ptr(t)) {
11752                 ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
11753
11754                 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
11755                         if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
11756                                 struct btf *ret_btf;
11757                                 u32 ret_btf_id;
11758
11759                                 if (unlikely(!bpf_global_ma_set))
11760                                         return -ENOMEM;
11761
11762                                 if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
11763                                         verbose(env, "local type ID argument must be in range [0, U32_MAX]\n");
11764                                         return -EINVAL;
11765                                 }
11766
11767                                 ret_btf = env->prog->aux->btf;
11768                                 ret_btf_id = meta.arg_constant.value;
11769
11770                                 /* This may be NULL due to user not supplying a BTF */
11771                                 if (!ret_btf) {
11772                                         verbose(env, "bpf_obj_new requires prog BTF\n");
11773                                         return -EINVAL;
11774                                 }
11775
11776                                 ret_t = btf_type_by_id(ret_btf, ret_btf_id);
11777                                 if (!ret_t || !__btf_type_is_struct(ret_t)) {
11778                                         verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
11779                                         return -EINVAL;
11780                                 }
11781
11782                                 mark_reg_known_zero(env, regs, BPF_REG_0);
11783                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
11784                                 regs[BPF_REG_0].btf = ret_btf;
11785                                 regs[BPF_REG_0].btf_id = ret_btf_id;
11786
11787                                 insn_aux->obj_new_size = ret_t->size;
11788                                 insn_aux->kptr_struct_meta =
11789                                         btf_find_struct_meta(ret_btf, ret_btf_id);
11790                         } else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
11791                                 mark_reg_known_zero(env, regs, BPF_REG_0);
11792                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
11793                                 regs[BPF_REG_0].btf = meta.arg_btf;
11794                                 regs[BPF_REG_0].btf_id = meta.arg_btf_id;
11795
11796                                 insn_aux->kptr_struct_meta =
11797                                         btf_find_struct_meta(meta.arg_btf,
11798                                                              meta.arg_btf_id);
11799                         } else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
11800                                    meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
11801                                 struct btf_field *field = meta.arg_list_head.field;
11802
11803                                 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
11804                         } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
11805                                    meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
11806                                 struct btf_field *field = meta.arg_rbtree_root.field;
11807
11808                                 mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
11809                         } else if (meta.func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx]) {
11810                                 mark_reg_known_zero(env, regs, BPF_REG_0);
11811                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_TRUSTED;
11812                                 regs[BPF_REG_0].btf = desc_btf;
11813                                 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
11814                         } else if (meta.func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
11815                                 ret_t = btf_type_by_id(desc_btf, meta.arg_constant.value);
11816                                 if (!ret_t || !btf_type_is_struct(ret_t)) {
11817                                         verbose(env,
11818                                                 "kfunc bpf_rdonly_cast type ID argument must be of a struct\n");
11819                                         return -EINVAL;
11820                                 }
11821
11822                                 mark_reg_known_zero(env, regs, BPF_REG_0);
11823                                 regs[BPF_REG_0].type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
11824                                 regs[BPF_REG_0].btf = desc_btf;
11825                                 regs[BPF_REG_0].btf_id = meta.arg_constant.value;
11826                         } else if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice] ||
11827                                    meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice_rdwr]) {
11828                                 enum bpf_type_flag type_flag = get_dynptr_type_flag(meta.initialized_dynptr.type);
11829
11830                                 mark_reg_known_zero(env, regs, BPF_REG_0);
11831
11832                                 if (!meta.arg_constant.found) {
11833                                         verbose(env, "verifier internal error: bpf_dynptr_slice(_rdwr) no constant size\n");
11834                                         return -EFAULT;
11835                                 }
11836
11837                                 regs[BPF_REG_0].mem_size = meta.arg_constant.value;
11838
11839                                 /* PTR_MAYBE_NULL will be added when is_kfunc_ret_null is checked */
11840                                 regs[BPF_REG_0].type = PTR_TO_MEM | type_flag;
11841
11842                                 if (meta.func_id == special_kfunc_list[KF_bpf_dynptr_slice]) {
11843                                         regs[BPF_REG_0].type |= MEM_RDONLY;
11844                                 } else {
11845                                         /* this will set env->seen_direct_write to true */
11846                                         if (!may_access_direct_pkt_data(env, NULL, BPF_WRITE)) {
11847                                                 verbose(env, "the prog does not allow writes to packet data\n");
11848                                                 return -EINVAL;
11849                                         }
11850                                 }
11851
11852                                 if (!meta.initialized_dynptr.id) {
11853                                         verbose(env, "verifier internal error: no dynptr id\n");
11854                                         return -EFAULT;
11855                                 }
11856                                 regs[BPF_REG_0].dynptr_id = meta.initialized_dynptr.id;
11857
11858                                 /* we don't need to set BPF_REG_0's ref obj id
11859                                  * because packet slices are not refcounted (see
11860                                  * dynptr_type_refcounted)
11861                                  */
11862                         } else {
11863                                 verbose(env, "kernel function %s unhandled dynamic return type\n",
11864                                         meta.func_name);
11865                                 return -EFAULT;
11866                         }
11867                 } else if (!__btf_type_is_struct(ptr_type)) {
11868                         if (!meta.r0_size) {
11869                                 __u32 sz;
11870
11871                                 if (!IS_ERR(btf_resolve_size(desc_btf, ptr_type, &sz))) {
11872                                         meta.r0_size = sz;
11873                                         meta.r0_rdonly = true;
11874                                 }
11875                         }
11876                         if (!meta.r0_size) {
11877                                 ptr_type_name = btf_name_by_offset(desc_btf,
11878                                                                    ptr_type->name_off);
11879                                 verbose(env,
11880                                         "kernel function %s returns pointer type %s %s is not supported\n",
11881                                         func_name,
11882                                         btf_type_str(ptr_type),
11883                                         ptr_type_name);
11884                                 return -EINVAL;
11885                         }
11886
11887                         mark_reg_known_zero(env, regs, BPF_REG_0);
11888                         regs[BPF_REG_0].type = PTR_TO_MEM;
11889                         regs[BPF_REG_0].mem_size = meta.r0_size;
11890
11891                         if (meta.r0_rdonly)
11892                                 regs[BPF_REG_0].type |= MEM_RDONLY;
11893
11894                         /* Ensures we don't access the memory after a release_reference() */
11895                         if (meta.ref_obj_id)
11896                                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
11897                 } else {
11898                         mark_reg_known_zero(env, regs, BPF_REG_0);
11899                         regs[BPF_REG_0].btf = desc_btf;
11900                         regs[BPF_REG_0].type = PTR_TO_BTF_ID;
11901                         regs[BPF_REG_0].btf_id = ptr_type_id;
11902                 }
11903
11904                 if (is_kfunc_ret_null(&meta)) {
11905                         regs[BPF_REG_0].type |= PTR_MAYBE_NULL;
11906                         /* For mark_ptr_or_null_reg, see 93c230e3f5bd6 */
11907                         regs[BPF_REG_0].id = ++env->id_gen;
11908                 }
11909                 mark_btf_func_reg_size(env, BPF_REG_0, sizeof(void *));
11910                 if (is_kfunc_acquire(&meta)) {
11911                         int id = acquire_reference_state(env, insn_idx);
11912
11913                         if (id < 0)
11914                                 return id;
11915                         if (is_kfunc_ret_null(&meta))
11916                                 regs[BPF_REG_0].id = id;
11917                         regs[BPF_REG_0].ref_obj_id = id;
11918                 } else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
11919                         ref_set_non_owning(env, &regs[BPF_REG_0]);
11920                 }
11921
11922                 if (reg_may_point_to_spin_lock(&regs[BPF_REG_0]) && !regs[BPF_REG_0].id)
11923                         regs[BPF_REG_0].id = ++env->id_gen;
11924         } else if (btf_type_is_void(t)) {
11925                 if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
11926                         if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
11927                                 insn_aux->kptr_struct_meta =
11928                                         btf_find_struct_meta(meta.arg_btf,
11929                                                              meta.arg_btf_id);
11930                         }
11931                 }
11932         }
11933
11934         nargs = btf_type_vlen(meta.func_proto);
11935         args = (const struct btf_param *)(meta.func_proto + 1);
11936         for (i = 0; i < nargs; i++) {
11937                 u32 regno = i + 1;
11938
11939                 t = btf_type_skip_modifiers(desc_btf, args[i].type, NULL);
11940                 if (btf_type_is_ptr(t))
11941                         mark_btf_func_reg_size(env, regno, sizeof(void *));
11942                 else
11943                         /* scalar. ensured by btf_check_kfunc_arg_match() */
11944                         mark_btf_func_reg_size(env, regno, t->size);
11945         }
11946
11947         if (is_iter_next_kfunc(&meta)) {
11948                 err = process_iter_next_call(env, insn_idx, &meta);
11949                 if (err)
11950                         return err;
11951         }
11952
11953         return 0;
11954 }
11955
11956 static bool signed_add_overflows(s64 a, s64 b)
11957 {
11958         /* Do the add in u64, where overflow is well-defined */
11959         s64 res = (s64)((u64)a + (u64)b);
11960
11961         if (b < 0)
11962                 return res > a;
11963         return res < a;
11964 }
11965
11966 static bool signed_add32_overflows(s32 a, s32 b)
11967 {
11968         /* Do the add in u32, where overflow is well-defined */
11969         s32 res = (s32)((u32)a + (u32)b);
11970
11971         if (b < 0)
11972                 return res > a;
11973         return res < a;
11974 }
11975
11976 static bool signed_sub_overflows(s64 a, s64 b)
11977 {
11978         /* Do the sub in u64, where overflow is well-defined */
11979         s64 res = (s64)((u64)a - (u64)b);
11980
11981         if (b < 0)
11982                 return res < a;
11983         return res > a;
11984 }
11985
11986 static bool signed_sub32_overflows(s32 a, s32 b)
11987 {
11988         /* Do the sub in u32, where overflow is well-defined */
11989         s32 res = (s32)((u32)a - (u32)b);
11990
11991         if (b < 0)
11992                 return res < a;
11993         return res > a;
11994 }
11995
11996 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
11997                                   const struct bpf_reg_state *reg,
11998                                   enum bpf_reg_type type)
11999 {
12000         bool known = tnum_is_const(reg->var_off);
12001         s64 val = reg->var_off.value;
12002         s64 smin = reg->smin_value;
12003
12004         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
12005                 verbose(env, "math between %s pointer and %lld is not allowed\n",
12006                         reg_type_str(env, type), val);
12007                 return false;
12008         }
12009
12010         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
12011                 verbose(env, "%s pointer offset %d is not allowed\n",
12012                         reg_type_str(env, type), reg->off);
12013                 return false;
12014         }
12015
12016         if (smin == S64_MIN) {
12017                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
12018                         reg_type_str(env, type));
12019                 return false;
12020         }
12021
12022         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
12023                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
12024                         smin, reg_type_str(env, type));
12025                 return false;
12026         }
12027
12028         return true;
12029 }
12030
12031 enum {
12032         REASON_BOUNDS   = -1,
12033         REASON_TYPE     = -2,
12034         REASON_PATHS    = -3,
12035         REASON_LIMIT    = -4,
12036         REASON_STACK    = -5,
12037 };
12038
12039 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
12040                               u32 *alu_limit, bool mask_to_left)
12041 {
12042         u32 max = 0, ptr_limit = 0;
12043
12044         switch (ptr_reg->type) {
12045         case PTR_TO_STACK:
12046                 /* Offset 0 is out-of-bounds, but acceptable start for the
12047                  * left direction, see BPF_REG_FP. Also, unknown scalar
12048                  * offset where we would need to deal with min/max bounds is
12049                  * currently prohibited for unprivileged.
12050                  */
12051                 max = MAX_BPF_STACK + mask_to_left;
12052                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
12053                 break;
12054         case PTR_TO_MAP_VALUE:
12055                 max = ptr_reg->map_ptr->value_size;
12056                 ptr_limit = (mask_to_left ?
12057                              ptr_reg->smin_value :
12058                              ptr_reg->umax_value) + ptr_reg->off;
12059                 break;
12060         default:
12061                 return REASON_TYPE;
12062         }
12063
12064         if (ptr_limit >= max)
12065                 return REASON_LIMIT;
12066         *alu_limit = ptr_limit;
12067         return 0;
12068 }
12069
12070 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
12071                                     const struct bpf_insn *insn)
12072 {
12073         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
12074 }
12075
12076 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
12077                                        u32 alu_state, u32 alu_limit)
12078 {
12079         /* If we arrived here from different branches with different
12080          * state or limits to sanitize, then this won't work.
12081          */
12082         if (aux->alu_state &&
12083             (aux->alu_state != alu_state ||
12084              aux->alu_limit != alu_limit))
12085                 return REASON_PATHS;
12086
12087         /* Corresponding fixup done in do_misc_fixups(). */
12088         aux->alu_state = alu_state;
12089         aux->alu_limit = alu_limit;
12090         return 0;
12091 }
12092
12093 static int sanitize_val_alu(struct bpf_verifier_env *env,
12094                             struct bpf_insn *insn)
12095 {
12096         struct bpf_insn_aux_data *aux = cur_aux(env);
12097
12098         if (can_skip_alu_sanitation(env, insn))
12099                 return 0;
12100
12101         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
12102 }
12103
12104 static bool sanitize_needed(u8 opcode)
12105 {
12106         return opcode == BPF_ADD || opcode == BPF_SUB;
12107 }
12108
12109 struct bpf_sanitize_info {
12110         struct bpf_insn_aux_data aux;
12111         bool mask_to_left;
12112 };
12113
12114 static struct bpf_verifier_state *
12115 sanitize_speculative_path(struct bpf_verifier_env *env,
12116                           const struct bpf_insn *insn,
12117                           u32 next_idx, u32 curr_idx)
12118 {
12119         struct bpf_verifier_state *branch;
12120         struct bpf_reg_state *regs;
12121
12122         branch = push_stack(env, next_idx, curr_idx, true);
12123         if (branch && insn) {
12124                 regs = branch->frame[branch->curframe]->regs;
12125                 if (BPF_SRC(insn->code) == BPF_K) {
12126                         mark_reg_unknown(env, regs, insn->dst_reg);
12127                 } else if (BPF_SRC(insn->code) == BPF_X) {
12128                         mark_reg_unknown(env, regs, insn->dst_reg);
12129                         mark_reg_unknown(env, regs, insn->src_reg);
12130                 }
12131         }
12132         return branch;
12133 }
12134
12135 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
12136                             struct bpf_insn *insn,
12137                             const struct bpf_reg_state *ptr_reg,
12138                             const struct bpf_reg_state *off_reg,
12139                             struct bpf_reg_state *dst_reg,
12140                             struct bpf_sanitize_info *info,
12141                             const bool commit_window)
12142 {
12143         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
12144         struct bpf_verifier_state *vstate = env->cur_state;
12145         bool off_is_imm = tnum_is_const(off_reg->var_off);
12146         bool off_is_neg = off_reg->smin_value < 0;
12147         bool ptr_is_dst_reg = ptr_reg == dst_reg;
12148         u8 opcode = BPF_OP(insn->code);
12149         u32 alu_state, alu_limit;
12150         struct bpf_reg_state tmp;
12151         bool ret;
12152         int err;
12153
12154         if (can_skip_alu_sanitation(env, insn))
12155                 return 0;
12156
12157         /* We already marked aux for masking from non-speculative
12158          * paths, thus we got here in the first place. We only care
12159          * to explore bad access from here.
12160          */
12161         if (vstate->speculative)
12162                 goto do_sim;
12163
12164         if (!commit_window) {
12165                 if (!tnum_is_const(off_reg->var_off) &&
12166                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
12167                         return REASON_BOUNDS;
12168
12169                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
12170                                      (opcode == BPF_SUB && !off_is_neg);
12171         }
12172
12173         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
12174         if (err < 0)
12175                 return err;
12176
12177         if (commit_window) {
12178                 /* In commit phase we narrow the masking window based on
12179                  * the observed pointer move after the simulated operation.
12180                  */
12181                 alu_state = info->aux.alu_state;
12182                 alu_limit = abs(info->aux.alu_limit - alu_limit);
12183         } else {
12184                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
12185                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
12186                 alu_state |= ptr_is_dst_reg ?
12187                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
12188
12189                 /* Limit pruning on unknown scalars to enable deep search for
12190                  * potential masking differences from other program paths.
12191                  */
12192                 if (!off_is_imm)
12193                         env->explore_alu_limits = true;
12194         }
12195
12196         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
12197         if (err < 0)
12198                 return err;
12199 do_sim:
12200         /* If we're in commit phase, we're done here given we already
12201          * pushed the truncated dst_reg into the speculative verification
12202          * stack.
12203          *
12204          * Also, when register is a known constant, we rewrite register-based
12205          * operation to immediate-based, and thus do not need masking (and as
12206          * a consequence, do not need to simulate the zero-truncation either).
12207          */
12208         if (commit_window || off_is_imm)
12209                 return 0;
12210
12211         /* Simulate and find potential out-of-bounds access under
12212          * speculative execution from truncation as a result of
12213          * masking when off was not within expected range. If off
12214          * sits in dst, then we temporarily need to move ptr there
12215          * to simulate dst (== 0) +/-= ptr. Needed, for example,
12216          * for cases where we use K-based arithmetic in one direction
12217          * and truncated reg-based in the other in order to explore
12218          * bad access.
12219          */
12220         if (!ptr_is_dst_reg) {
12221                 tmp = *dst_reg;
12222                 copy_register_state(dst_reg, ptr_reg);
12223         }
12224         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
12225                                         env->insn_idx);
12226         if (!ptr_is_dst_reg && ret)
12227                 *dst_reg = tmp;
12228         return !ret ? REASON_STACK : 0;
12229 }
12230
12231 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
12232 {
12233         struct bpf_verifier_state *vstate = env->cur_state;
12234
12235         /* If we simulate paths under speculation, we don't update the
12236          * insn as 'seen' such that when we verify unreachable paths in
12237          * the non-speculative domain, sanitize_dead_code() can still
12238          * rewrite/sanitize them.
12239          */
12240         if (!vstate->speculative)
12241                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
12242 }
12243
12244 static int sanitize_err(struct bpf_verifier_env *env,
12245                         const struct bpf_insn *insn, int reason,
12246                         const struct bpf_reg_state *off_reg,
12247                         const struct bpf_reg_state *dst_reg)
12248 {
12249         static const char *err = "pointer arithmetic with it prohibited for !root";
12250         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
12251         u32 dst = insn->dst_reg, src = insn->src_reg;
12252
12253         switch (reason) {
12254         case REASON_BOUNDS:
12255                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
12256                         off_reg == dst_reg ? dst : src, err);
12257                 break;
12258         case REASON_TYPE:
12259                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
12260                         off_reg == dst_reg ? src : dst, err);
12261                 break;
12262         case REASON_PATHS:
12263                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
12264                         dst, op, err);
12265                 break;
12266         case REASON_LIMIT:
12267                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
12268                         dst, op, err);
12269                 break;
12270         case REASON_STACK:
12271                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
12272                         dst, err);
12273                 break;
12274         default:
12275                 verbose(env, "verifier internal error: unknown reason (%d)\n",
12276                         reason);
12277                 break;
12278         }
12279
12280         return -EACCES;
12281 }
12282
12283 /* check that stack access falls within stack limits and that 'reg' doesn't
12284  * have a variable offset.
12285  *
12286  * Variable offset is prohibited for unprivileged mode for simplicity since it
12287  * requires corresponding support in Spectre masking for stack ALU.  See also
12288  * retrieve_ptr_limit().
12289  *
12290  *
12291  * 'off' includes 'reg->off'.
12292  */
12293 static int check_stack_access_for_ptr_arithmetic(
12294                                 struct bpf_verifier_env *env,
12295                                 int regno,
12296                                 const struct bpf_reg_state *reg,
12297                                 int off)
12298 {
12299         if (!tnum_is_const(reg->var_off)) {
12300                 char tn_buf[48];
12301
12302                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
12303                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
12304                         regno, tn_buf, off);
12305                 return -EACCES;
12306         }
12307
12308         if (off >= 0 || off < -MAX_BPF_STACK) {
12309                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
12310                         "prohibited for !root; off=%d\n", regno, off);
12311                 return -EACCES;
12312         }
12313
12314         return 0;
12315 }
12316
12317 static int sanitize_check_bounds(struct bpf_verifier_env *env,
12318                                  const struct bpf_insn *insn,
12319                                  const struct bpf_reg_state *dst_reg)
12320 {
12321         u32 dst = insn->dst_reg;
12322
12323         /* For unprivileged we require that resulting offset must be in bounds
12324          * in order to be able to sanitize access later on.
12325          */
12326         if (env->bypass_spec_v1)
12327                 return 0;
12328
12329         switch (dst_reg->type) {
12330         case PTR_TO_STACK:
12331                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
12332                                         dst_reg->off + dst_reg->var_off.value))
12333                         return -EACCES;
12334                 break;
12335         case PTR_TO_MAP_VALUE:
12336                 if (check_map_access(env, dst, dst_reg->off, 1, false, ACCESS_HELPER)) {
12337                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
12338                                 "prohibited for !root\n", dst);
12339                         return -EACCES;
12340                 }
12341                 break;
12342         default:
12343                 break;
12344         }
12345
12346         return 0;
12347 }
12348
12349 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
12350  * Caller should also handle BPF_MOV case separately.
12351  * If we return -EACCES, caller may want to try again treating pointer as a
12352  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
12353  */
12354 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
12355                                    struct bpf_insn *insn,
12356                                    const struct bpf_reg_state *ptr_reg,
12357                                    const struct bpf_reg_state *off_reg)
12358 {
12359         struct bpf_verifier_state *vstate = env->cur_state;
12360         struct bpf_func_state *state = vstate->frame[vstate->curframe];
12361         struct bpf_reg_state *regs = state->regs, *dst_reg;
12362         bool known = tnum_is_const(off_reg->var_off);
12363         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
12364             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
12365         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
12366             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
12367         struct bpf_sanitize_info info = {};
12368         u8 opcode = BPF_OP(insn->code);
12369         u32 dst = insn->dst_reg;
12370         int ret;
12371
12372         dst_reg = &regs[dst];
12373
12374         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
12375             smin_val > smax_val || umin_val > umax_val) {
12376                 /* Taint dst register if offset had invalid bounds derived from
12377                  * e.g. dead branches.
12378                  */
12379                 __mark_reg_unknown(env, dst_reg);
12380                 return 0;
12381         }
12382
12383         if (BPF_CLASS(insn->code) != BPF_ALU64) {
12384                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
12385                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
12386                         __mark_reg_unknown(env, dst_reg);
12387                         return 0;
12388                 }
12389
12390                 verbose(env,
12391                         "R%d 32-bit pointer arithmetic prohibited\n",
12392                         dst);
12393                 return -EACCES;
12394         }
12395
12396         if (ptr_reg->type & PTR_MAYBE_NULL) {
12397                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
12398                         dst, reg_type_str(env, ptr_reg->type));
12399                 return -EACCES;
12400         }
12401
12402         switch (base_type(ptr_reg->type)) {
12403         case PTR_TO_FLOW_KEYS:
12404                 if (known)
12405                         break;
12406                 fallthrough;
12407         case CONST_PTR_TO_MAP:
12408                 /* smin_val represents the known value */
12409                 if (known && smin_val == 0 && opcode == BPF_ADD)
12410                         break;
12411                 fallthrough;
12412         case PTR_TO_PACKET_END:
12413         case PTR_TO_SOCKET:
12414         case PTR_TO_SOCK_COMMON:
12415         case PTR_TO_TCP_SOCK:
12416         case PTR_TO_XDP_SOCK:
12417                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
12418                         dst, reg_type_str(env, ptr_reg->type));
12419                 return -EACCES;
12420         default:
12421                 break;
12422         }
12423
12424         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
12425          * The id may be overwritten later if we create a new variable offset.
12426          */
12427         dst_reg->type = ptr_reg->type;
12428         dst_reg->id = ptr_reg->id;
12429
12430         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
12431             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
12432                 return -EINVAL;
12433
12434         /* pointer types do not carry 32-bit bounds at the moment. */
12435         __mark_reg32_unbounded(dst_reg);
12436
12437         if (sanitize_needed(opcode)) {
12438                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
12439                                        &info, false);
12440                 if (ret < 0)
12441                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
12442         }
12443
12444         switch (opcode) {
12445         case BPF_ADD:
12446                 /* We can take a fixed offset as long as it doesn't overflow
12447                  * the s32 'off' field
12448                  */
12449                 if (known && (ptr_reg->off + smin_val ==
12450                               (s64)(s32)(ptr_reg->off + smin_val))) {
12451                         /* pointer += K.  Accumulate it into fixed offset */
12452                         dst_reg->smin_value = smin_ptr;
12453                         dst_reg->smax_value = smax_ptr;
12454                         dst_reg->umin_value = umin_ptr;
12455                         dst_reg->umax_value = umax_ptr;
12456                         dst_reg->var_off = ptr_reg->var_off;
12457                         dst_reg->off = ptr_reg->off + smin_val;
12458                         dst_reg->raw = ptr_reg->raw;
12459                         break;
12460                 }
12461                 /* A new variable offset is created.  Note that off_reg->off
12462                  * == 0, since it's a scalar.
12463                  * dst_reg gets the pointer type and since some positive
12464                  * integer value was added to the pointer, give it a new 'id'
12465                  * if it's a PTR_TO_PACKET.
12466                  * this creates a new 'base' pointer, off_reg (variable) gets
12467                  * added into the variable offset, and we copy the fixed offset
12468                  * from ptr_reg.
12469                  */
12470                 if (signed_add_overflows(smin_ptr, smin_val) ||
12471                     signed_add_overflows(smax_ptr, smax_val)) {
12472                         dst_reg->smin_value = S64_MIN;
12473                         dst_reg->smax_value = S64_MAX;
12474                 } else {
12475                         dst_reg->smin_value = smin_ptr + smin_val;
12476                         dst_reg->smax_value = smax_ptr + smax_val;
12477                 }
12478                 if (umin_ptr + umin_val < umin_ptr ||
12479                     umax_ptr + umax_val < umax_ptr) {
12480                         dst_reg->umin_value = 0;
12481                         dst_reg->umax_value = U64_MAX;
12482                 } else {
12483                         dst_reg->umin_value = umin_ptr + umin_val;
12484                         dst_reg->umax_value = umax_ptr + umax_val;
12485                 }
12486                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
12487                 dst_reg->off = ptr_reg->off;
12488                 dst_reg->raw = ptr_reg->raw;
12489                 if (reg_is_pkt_pointer(ptr_reg)) {
12490                         dst_reg->id = ++env->id_gen;
12491                         /* something was added to pkt_ptr, set range to zero */
12492                         memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
12493                 }
12494                 break;
12495         case BPF_SUB:
12496                 if (dst_reg == off_reg) {
12497                         /* scalar -= pointer.  Creates an unknown scalar */
12498                         verbose(env, "R%d tried to subtract pointer from scalar\n",
12499                                 dst);
12500                         return -EACCES;
12501                 }
12502                 /* We don't allow subtraction from FP, because (according to
12503                  * test_verifier.c test "invalid fp arithmetic", JITs might not
12504                  * be able to deal with it.
12505                  */
12506                 if (ptr_reg->type == PTR_TO_STACK) {
12507                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
12508                                 dst);
12509                         return -EACCES;
12510                 }
12511                 if (known && (ptr_reg->off - smin_val ==
12512                               (s64)(s32)(ptr_reg->off - smin_val))) {
12513                         /* pointer -= K.  Subtract it from fixed offset */
12514                         dst_reg->smin_value = smin_ptr;
12515                         dst_reg->smax_value = smax_ptr;
12516                         dst_reg->umin_value = umin_ptr;
12517                         dst_reg->umax_value = umax_ptr;
12518                         dst_reg->var_off = ptr_reg->var_off;
12519                         dst_reg->id = ptr_reg->id;
12520                         dst_reg->off = ptr_reg->off - smin_val;
12521                         dst_reg->raw = ptr_reg->raw;
12522                         break;
12523                 }
12524                 /* A new variable offset is created.  If the subtrahend is known
12525                  * nonnegative, then any reg->range we had before is still good.
12526                  */
12527                 if (signed_sub_overflows(smin_ptr, smax_val) ||
12528                     signed_sub_overflows(smax_ptr, smin_val)) {
12529                         /* Overflow possible, we know nothing */
12530                         dst_reg->smin_value = S64_MIN;
12531                         dst_reg->smax_value = S64_MAX;
12532                 } else {
12533                         dst_reg->smin_value = smin_ptr - smax_val;
12534                         dst_reg->smax_value = smax_ptr - smin_val;
12535                 }
12536                 if (umin_ptr < umax_val) {
12537                         /* Overflow possible, we know nothing */
12538                         dst_reg->umin_value = 0;
12539                         dst_reg->umax_value = U64_MAX;
12540                 } else {
12541                         /* Cannot overflow (as long as bounds are consistent) */
12542                         dst_reg->umin_value = umin_ptr - umax_val;
12543                         dst_reg->umax_value = umax_ptr - umin_val;
12544                 }
12545                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
12546                 dst_reg->off = ptr_reg->off;
12547                 dst_reg->raw = ptr_reg->raw;
12548                 if (reg_is_pkt_pointer(ptr_reg)) {
12549                         dst_reg->id = ++env->id_gen;
12550                         /* something was added to pkt_ptr, set range to zero */
12551                         if (smin_val < 0)
12552                                 memset(&dst_reg->raw, 0, sizeof(dst_reg->raw));
12553                 }
12554                 break;
12555         case BPF_AND:
12556         case BPF_OR:
12557         case BPF_XOR:
12558                 /* bitwise ops on pointers are troublesome, prohibit. */
12559                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
12560                         dst, bpf_alu_string[opcode >> 4]);
12561                 return -EACCES;
12562         default:
12563                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
12564                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
12565                         dst, bpf_alu_string[opcode >> 4]);
12566                 return -EACCES;
12567         }
12568
12569         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
12570                 return -EINVAL;
12571         reg_bounds_sync(dst_reg);
12572         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
12573                 return -EACCES;
12574         if (sanitize_needed(opcode)) {
12575                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
12576                                        &info, true);
12577                 if (ret < 0)
12578                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
12579         }
12580
12581         return 0;
12582 }
12583
12584 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
12585                                  struct bpf_reg_state *src_reg)
12586 {
12587         s32 smin_val = src_reg->s32_min_value;
12588         s32 smax_val = src_reg->s32_max_value;
12589         u32 umin_val = src_reg->u32_min_value;
12590         u32 umax_val = src_reg->u32_max_value;
12591
12592         if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
12593             signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
12594                 dst_reg->s32_min_value = S32_MIN;
12595                 dst_reg->s32_max_value = S32_MAX;
12596         } else {
12597                 dst_reg->s32_min_value += smin_val;
12598                 dst_reg->s32_max_value += smax_val;
12599         }
12600         if (dst_reg->u32_min_value + umin_val < umin_val ||
12601             dst_reg->u32_max_value + umax_val < umax_val) {
12602                 dst_reg->u32_min_value = 0;
12603                 dst_reg->u32_max_value = U32_MAX;
12604         } else {
12605                 dst_reg->u32_min_value += umin_val;
12606                 dst_reg->u32_max_value += umax_val;
12607         }
12608 }
12609
12610 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
12611                                struct bpf_reg_state *src_reg)
12612 {
12613         s64 smin_val = src_reg->smin_value;
12614         s64 smax_val = src_reg->smax_value;
12615         u64 umin_val = src_reg->umin_value;
12616         u64 umax_val = src_reg->umax_value;
12617
12618         if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
12619             signed_add_overflows(dst_reg->smax_value, smax_val)) {
12620                 dst_reg->smin_value = S64_MIN;
12621                 dst_reg->smax_value = S64_MAX;
12622         } else {
12623                 dst_reg->smin_value += smin_val;
12624                 dst_reg->smax_value += smax_val;
12625         }
12626         if (dst_reg->umin_value + umin_val < umin_val ||
12627             dst_reg->umax_value + umax_val < umax_val) {
12628                 dst_reg->umin_value = 0;
12629                 dst_reg->umax_value = U64_MAX;
12630         } else {
12631                 dst_reg->umin_value += umin_val;
12632                 dst_reg->umax_value += umax_val;
12633         }
12634 }
12635
12636 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
12637                                  struct bpf_reg_state *src_reg)
12638 {
12639         s32 smin_val = src_reg->s32_min_value;
12640         s32 smax_val = src_reg->s32_max_value;
12641         u32 umin_val = src_reg->u32_min_value;
12642         u32 umax_val = src_reg->u32_max_value;
12643
12644         if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
12645             signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
12646                 /* Overflow possible, we know nothing */
12647                 dst_reg->s32_min_value = S32_MIN;
12648                 dst_reg->s32_max_value = S32_MAX;
12649         } else {
12650                 dst_reg->s32_min_value -= smax_val;
12651                 dst_reg->s32_max_value -= smin_val;
12652         }
12653         if (dst_reg->u32_min_value < umax_val) {
12654                 /* Overflow possible, we know nothing */
12655                 dst_reg->u32_min_value = 0;
12656                 dst_reg->u32_max_value = U32_MAX;
12657         } else {
12658                 /* Cannot overflow (as long as bounds are consistent) */
12659                 dst_reg->u32_min_value -= umax_val;
12660                 dst_reg->u32_max_value -= umin_val;
12661         }
12662 }
12663
12664 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
12665                                struct bpf_reg_state *src_reg)
12666 {
12667         s64 smin_val = src_reg->smin_value;
12668         s64 smax_val = src_reg->smax_value;
12669         u64 umin_val = src_reg->umin_value;
12670         u64 umax_val = src_reg->umax_value;
12671
12672         if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
12673             signed_sub_overflows(dst_reg->smax_value, smin_val)) {
12674                 /* Overflow possible, we know nothing */
12675                 dst_reg->smin_value = S64_MIN;
12676                 dst_reg->smax_value = S64_MAX;
12677         } else {
12678                 dst_reg->smin_value -= smax_val;
12679                 dst_reg->smax_value -= smin_val;
12680         }
12681         if (dst_reg->umin_value < umax_val) {
12682                 /* Overflow possible, we know nothing */
12683                 dst_reg->umin_value = 0;
12684                 dst_reg->umax_value = U64_MAX;
12685         } else {
12686                 /* Cannot overflow (as long as bounds are consistent) */
12687                 dst_reg->umin_value -= umax_val;
12688                 dst_reg->umax_value -= umin_val;
12689         }
12690 }
12691
12692 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
12693                                  struct bpf_reg_state *src_reg)
12694 {
12695         s32 smin_val = src_reg->s32_min_value;
12696         u32 umin_val = src_reg->u32_min_value;
12697         u32 umax_val = src_reg->u32_max_value;
12698
12699         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
12700                 /* Ain't nobody got time to multiply that sign */
12701                 __mark_reg32_unbounded(dst_reg);
12702                 return;
12703         }
12704         /* Both values are positive, so we can work with unsigned and
12705          * copy the result to signed (unless it exceeds S32_MAX).
12706          */
12707         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
12708                 /* Potential overflow, we know nothing */
12709                 __mark_reg32_unbounded(dst_reg);
12710                 return;
12711         }
12712         dst_reg->u32_min_value *= umin_val;
12713         dst_reg->u32_max_value *= umax_val;
12714         if (dst_reg->u32_max_value > S32_MAX) {
12715                 /* Overflow possible, we know nothing */
12716                 dst_reg->s32_min_value = S32_MIN;
12717                 dst_reg->s32_max_value = S32_MAX;
12718         } else {
12719                 dst_reg->s32_min_value = dst_reg->u32_min_value;
12720                 dst_reg->s32_max_value = dst_reg->u32_max_value;
12721         }
12722 }
12723
12724 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
12725                                struct bpf_reg_state *src_reg)
12726 {
12727         s64 smin_val = src_reg->smin_value;
12728         u64 umin_val = src_reg->umin_value;
12729         u64 umax_val = src_reg->umax_value;
12730
12731         if (smin_val < 0 || dst_reg->smin_value < 0) {
12732                 /* Ain't nobody got time to multiply that sign */
12733                 __mark_reg64_unbounded(dst_reg);
12734                 return;
12735         }
12736         /* Both values are positive, so we can work with unsigned and
12737          * copy the result to signed (unless it exceeds S64_MAX).
12738          */
12739         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
12740                 /* Potential overflow, we know nothing */
12741                 __mark_reg64_unbounded(dst_reg);
12742                 return;
12743         }
12744         dst_reg->umin_value *= umin_val;
12745         dst_reg->umax_value *= umax_val;
12746         if (dst_reg->umax_value > S64_MAX) {
12747                 /* Overflow possible, we know nothing */
12748                 dst_reg->smin_value = S64_MIN;
12749                 dst_reg->smax_value = S64_MAX;
12750         } else {
12751                 dst_reg->smin_value = dst_reg->umin_value;
12752                 dst_reg->smax_value = dst_reg->umax_value;
12753         }
12754 }
12755
12756 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
12757                                  struct bpf_reg_state *src_reg)
12758 {
12759         bool src_known = tnum_subreg_is_const(src_reg->var_off);
12760         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
12761         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
12762         s32 smin_val = src_reg->s32_min_value;
12763         u32 umax_val = src_reg->u32_max_value;
12764
12765         if (src_known && dst_known) {
12766                 __mark_reg32_known(dst_reg, var32_off.value);
12767                 return;
12768         }
12769
12770         /* We get our minimum from the var_off, since that's inherently
12771          * bitwise.  Our maximum is the minimum of the operands' maxima.
12772          */
12773         dst_reg->u32_min_value = var32_off.value;
12774         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
12775         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
12776                 /* Lose signed bounds when ANDing negative numbers,
12777                  * ain't nobody got time for that.
12778                  */
12779                 dst_reg->s32_min_value = S32_MIN;
12780                 dst_reg->s32_max_value = S32_MAX;
12781         } else {
12782                 /* ANDing two positives gives a positive, so safe to
12783                  * cast result into s64.
12784                  */
12785                 dst_reg->s32_min_value = dst_reg->u32_min_value;
12786                 dst_reg->s32_max_value = dst_reg->u32_max_value;
12787         }
12788 }
12789
12790 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
12791                                struct bpf_reg_state *src_reg)
12792 {
12793         bool src_known = tnum_is_const(src_reg->var_off);
12794         bool dst_known = tnum_is_const(dst_reg->var_off);
12795         s64 smin_val = src_reg->smin_value;
12796         u64 umax_val = src_reg->umax_value;
12797
12798         if (src_known && dst_known) {
12799                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
12800                 return;
12801         }
12802
12803         /* We get our minimum from the var_off, since that's inherently
12804          * bitwise.  Our maximum is the minimum of the operands' maxima.
12805          */
12806         dst_reg->umin_value = dst_reg->var_off.value;
12807         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
12808         if (dst_reg->smin_value < 0 || smin_val < 0) {
12809                 /* Lose signed bounds when ANDing negative numbers,
12810                  * ain't nobody got time for that.
12811                  */
12812                 dst_reg->smin_value = S64_MIN;
12813                 dst_reg->smax_value = S64_MAX;
12814         } else {
12815                 /* ANDing two positives gives a positive, so safe to
12816                  * cast result into s64.
12817                  */
12818                 dst_reg->smin_value = dst_reg->umin_value;
12819                 dst_reg->smax_value = dst_reg->umax_value;
12820         }
12821         /* We may learn something more from the var_off */
12822         __update_reg_bounds(dst_reg);
12823 }
12824
12825 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
12826                                 struct bpf_reg_state *src_reg)
12827 {
12828         bool src_known = tnum_subreg_is_const(src_reg->var_off);
12829         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
12830         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
12831         s32 smin_val = src_reg->s32_min_value;
12832         u32 umin_val = src_reg->u32_min_value;
12833
12834         if (src_known && dst_known) {
12835                 __mark_reg32_known(dst_reg, var32_off.value);
12836                 return;
12837         }
12838
12839         /* We get our maximum from the var_off, and our minimum is the
12840          * maximum of the operands' minima
12841          */
12842         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
12843         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
12844         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
12845                 /* Lose signed bounds when ORing negative numbers,
12846                  * ain't nobody got time for that.
12847                  */
12848                 dst_reg->s32_min_value = S32_MIN;
12849                 dst_reg->s32_max_value = S32_MAX;
12850         } else {
12851                 /* ORing two positives gives a positive, so safe to
12852                  * cast result into s64.
12853                  */
12854                 dst_reg->s32_min_value = dst_reg->u32_min_value;
12855                 dst_reg->s32_max_value = dst_reg->u32_max_value;
12856         }
12857 }
12858
12859 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
12860                               struct bpf_reg_state *src_reg)
12861 {
12862         bool src_known = tnum_is_const(src_reg->var_off);
12863         bool dst_known = tnum_is_const(dst_reg->var_off);
12864         s64 smin_val = src_reg->smin_value;
12865         u64 umin_val = src_reg->umin_value;
12866
12867         if (src_known && dst_known) {
12868                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
12869                 return;
12870         }
12871
12872         /* We get our maximum from the var_off, and our minimum is the
12873          * maximum of the operands' minima
12874          */
12875         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
12876         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
12877         if (dst_reg->smin_value < 0 || smin_val < 0) {
12878                 /* Lose signed bounds when ORing negative numbers,
12879                  * ain't nobody got time for that.
12880                  */
12881                 dst_reg->smin_value = S64_MIN;
12882                 dst_reg->smax_value = S64_MAX;
12883         } else {
12884                 /* ORing two positives gives a positive, so safe to
12885                  * cast result into s64.
12886                  */
12887                 dst_reg->smin_value = dst_reg->umin_value;
12888                 dst_reg->smax_value = dst_reg->umax_value;
12889         }
12890         /* We may learn something more from the var_off */
12891         __update_reg_bounds(dst_reg);
12892 }
12893
12894 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
12895                                  struct bpf_reg_state *src_reg)
12896 {
12897         bool src_known = tnum_subreg_is_const(src_reg->var_off);
12898         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
12899         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
12900         s32 smin_val = src_reg->s32_min_value;
12901
12902         if (src_known && dst_known) {
12903                 __mark_reg32_known(dst_reg, var32_off.value);
12904                 return;
12905         }
12906
12907         /* We get both minimum and maximum from the var32_off. */
12908         dst_reg->u32_min_value = var32_off.value;
12909         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
12910
12911         if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
12912                 /* XORing two positive sign numbers gives a positive,
12913                  * so safe to cast u32 result into s32.
12914                  */
12915                 dst_reg->s32_min_value = dst_reg->u32_min_value;
12916                 dst_reg->s32_max_value = dst_reg->u32_max_value;
12917         } else {
12918                 dst_reg->s32_min_value = S32_MIN;
12919                 dst_reg->s32_max_value = S32_MAX;
12920         }
12921 }
12922
12923 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
12924                                struct bpf_reg_state *src_reg)
12925 {
12926         bool src_known = tnum_is_const(src_reg->var_off);
12927         bool dst_known = tnum_is_const(dst_reg->var_off);
12928         s64 smin_val = src_reg->smin_value;
12929
12930         if (src_known && dst_known) {
12931                 /* dst_reg->var_off.value has been updated earlier */
12932                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
12933                 return;
12934         }
12935
12936         /* We get both minimum and maximum from the var_off. */
12937         dst_reg->umin_value = dst_reg->var_off.value;
12938         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
12939
12940         if (dst_reg->smin_value >= 0 && smin_val >= 0) {
12941                 /* XORing two positive sign numbers gives a positive,
12942                  * so safe to cast u64 result into s64.
12943                  */
12944                 dst_reg->smin_value = dst_reg->umin_value;
12945                 dst_reg->smax_value = dst_reg->umax_value;
12946         } else {
12947                 dst_reg->smin_value = S64_MIN;
12948                 dst_reg->smax_value = S64_MAX;
12949         }
12950
12951         __update_reg_bounds(dst_reg);
12952 }
12953
12954 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
12955                                    u64 umin_val, u64 umax_val)
12956 {
12957         /* We lose all sign bit information (except what we can pick
12958          * up from var_off)
12959          */
12960         dst_reg->s32_min_value = S32_MIN;
12961         dst_reg->s32_max_value = S32_MAX;
12962         /* If we might shift our top bit out, then we know nothing */
12963         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
12964                 dst_reg->u32_min_value = 0;
12965                 dst_reg->u32_max_value = U32_MAX;
12966         } else {
12967                 dst_reg->u32_min_value <<= umin_val;
12968                 dst_reg->u32_max_value <<= umax_val;
12969         }
12970 }
12971
12972 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
12973                                  struct bpf_reg_state *src_reg)
12974 {
12975         u32 umax_val = src_reg->u32_max_value;
12976         u32 umin_val = src_reg->u32_min_value;
12977         /* u32 alu operation will zext upper bits */
12978         struct tnum subreg = tnum_subreg(dst_reg->var_off);
12979
12980         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
12981         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
12982         /* Not required but being careful mark reg64 bounds as unknown so
12983          * that we are forced to pick them up from tnum and zext later and
12984          * if some path skips this step we are still safe.
12985          */
12986         __mark_reg64_unbounded(dst_reg);
12987         __update_reg32_bounds(dst_reg);
12988 }
12989
12990 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
12991                                    u64 umin_val, u64 umax_val)
12992 {
12993         /* Special case <<32 because it is a common compiler pattern to sign
12994          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
12995          * positive we know this shift will also be positive so we can track
12996          * bounds correctly. Otherwise we lose all sign bit information except
12997          * what we can pick up from var_off. Perhaps we can generalize this
12998          * later to shifts of any length.
12999          */
13000         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
13001                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
13002         else
13003                 dst_reg->smax_value = S64_MAX;
13004
13005         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
13006                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
13007         else
13008                 dst_reg->smin_value = S64_MIN;
13009
13010         /* If we might shift our top bit out, then we know nothing */
13011         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
13012                 dst_reg->umin_value = 0;
13013                 dst_reg->umax_value = U64_MAX;
13014         } else {
13015                 dst_reg->umin_value <<= umin_val;
13016                 dst_reg->umax_value <<= umax_val;
13017         }
13018 }
13019
13020 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
13021                                struct bpf_reg_state *src_reg)
13022 {
13023         u64 umax_val = src_reg->umax_value;
13024         u64 umin_val = src_reg->umin_value;
13025
13026         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
13027         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
13028         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
13029
13030         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
13031         /* We may learn something more from the var_off */
13032         __update_reg_bounds(dst_reg);
13033 }
13034
13035 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
13036                                  struct bpf_reg_state *src_reg)
13037 {
13038         struct tnum subreg = tnum_subreg(dst_reg->var_off);
13039         u32 umax_val = src_reg->u32_max_value;
13040         u32 umin_val = src_reg->u32_min_value;
13041
13042         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
13043          * be negative, then either:
13044          * 1) src_reg might be zero, so the sign bit of the result is
13045          *    unknown, so we lose our signed bounds
13046          * 2) it's known negative, thus the unsigned bounds capture the
13047          *    signed bounds
13048          * 3) the signed bounds cross zero, so they tell us nothing
13049          *    about the result
13050          * If the value in dst_reg is known nonnegative, then again the
13051          * unsigned bounds capture the signed bounds.
13052          * Thus, in all cases it suffices to blow away our signed bounds
13053          * and rely on inferring new ones from the unsigned bounds and
13054          * var_off of the result.
13055          */
13056         dst_reg->s32_min_value = S32_MIN;
13057         dst_reg->s32_max_value = S32_MAX;
13058
13059         dst_reg->var_off = tnum_rshift(subreg, umin_val);
13060         dst_reg->u32_min_value >>= umax_val;
13061         dst_reg->u32_max_value >>= umin_val;
13062
13063         __mark_reg64_unbounded(dst_reg);
13064         __update_reg32_bounds(dst_reg);
13065 }
13066
13067 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
13068                                struct bpf_reg_state *src_reg)
13069 {
13070         u64 umax_val = src_reg->umax_value;
13071         u64 umin_val = src_reg->umin_value;
13072
13073         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
13074          * be negative, then either:
13075          * 1) src_reg might be zero, so the sign bit of the result is
13076          *    unknown, so we lose our signed bounds
13077          * 2) it's known negative, thus the unsigned bounds capture the
13078          *    signed bounds
13079          * 3) the signed bounds cross zero, so they tell us nothing
13080          *    about the result
13081          * If the value in dst_reg is known nonnegative, then again the
13082          * unsigned bounds capture the signed bounds.
13083          * Thus, in all cases it suffices to blow away our signed bounds
13084          * and rely on inferring new ones from the unsigned bounds and
13085          * var_off of the result.
13086          */
13087         dst_reg->smin_value = S64_MIN;
13088         dst_reg->smax_value = S64_MAX;
13089         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
13090         dst_reg->umin_value >>= umax_val;
13091         dst_reg->umax_value >>= umin_val;
13092
13093         /* Its not easy to operate on alu32 bounds here because it depends
13094          * on bits being shifted in. Take easy way out and mark unbounded
13095          * so we can recalculate later from tnum.
13096          */
13097         __mark_reg32_unbounded(dst_reg);
13098         __update_reg_bounds(dst_reg);
13099 }
13100
13101 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
13102                                   struct bpf_reg_state *src_reg)
13103 {
13104         u64 umin_val = src_reg->u32_min_value;
13105
13106         /* Upon reaching here, src_known is true and
13107          * umax_val is equal to umin_val.
13108          */
13109         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
13110         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
13111
13112         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
13113
13114         /* blow away the dst_reg umin_value/umax_value and rely on
13115          * dst_reg var_off to refine the result.
13116          */
13117         dst_reg->u32_min_value = 0;
13118         dst_reg->u32_max_value = U32_MAX;
13119
13120         __mark_reg64_unbounded(dst_reg);
13121         __update_reg32_bounds(dst_reg);
13122 }
13123
13124 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
13125                                 struct bpf_reg_state *src_reg)
13126 {
13127         u64 umin_val = src_reg->umin_value;
13128
13129         /* Upon reaching here, src_known is true and umax_val is equal
13130          * to umin_val.
13131          */
13132         dst_reg->smin_value >>= umin_val;
13133         dst_reg->smax_value >>= umin_val;
13134
13135         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
13136
13137         /* blow away the dst_reg umin_value/umax_value and rely on
13138          * dst_reg var_off to refine the result.
13139          */
13140         dst_reg->umin_value = 0;
13141         dst_reg->umax_value = U64_MAX;
13142
13143         /* Its not easy to operate on alu32 bounds here because it depends
13144          * on bits being shifted in from upper 32-bits. Take easy way out
13145          * and mark unbounded so we can recalculate later from tnum.
13146          */
13147         __mark_reg32_unbounded(dst_reg);
13148         __update_reg_bounds(dst_reg);
13149 }
13150
13151 /* WARNING: This function does calculations on 64-bit values, but the actual
13152  * execution may occur on 32-bit values. Therefore, things like bitshifts
13153  * need extra checks in the 32-bit case.
13154  */
13155 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
13156                                       struct bpf_insn *insn,
13157                                       struct bpf_reg_state *dst_reg,
13158                                       struct bpf_reg_state src_reg)
13159 {
13160         struct bpf_reg_state *regs = cur_regs(env);
13161         u8 opcode = BPF_OP(insn->code);
13162         bool src_known;
13163         s64 smin_val, smax_val;
13164         u64 umin_val, umax_val;
13165         s32 s32_min_val, s32_max_val;
13166         u32 u32_min_val, u32_max_val;
13167         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
13168         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
13169         int ret;
13170
13171         smin_val = src_reg.smin_value;
13172         smax_val = src_reg.smax_value;
13173         umin_val = src_reg.umin_value;
13174         umax_val = src_reg.umax_value;
13175
13176         s32_min_val = src_reg.s32_min_value;
13177         s32_max_val = src_reg.s32_max_value;
13178         u32_min_val = src_reg.u32_min_value;
13179         u32_max_val = src_reg.u32_max_value;
13180
13181         if (alu32) {
13182                 src_known = tnum_subreg_is_const(src_reg.var_off);
13183                 if ((src_known &&
13184                      (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
13185                     s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
13186                         /* Taint dst register if offset had invalid bounds
13187                          * derived from e.g. dead branches.
13188                          */
13189                         __mark_reg_unknown(env, dst_reg);
13190                         return 0;
13191                 }
13192         } else {
13193                 src_known = tnum_is_const(src_reg.var_off);
13194                 if ((src_known &&
13195                      (smin_val != smax_val || umin_val != umax_val)) ||
13196                     smin_val > smax_val || umin_val > umax_val) {
13197                         /* Taint dst register if offset had invalid bounds
13198                          * derived from e.g. dead branches.
13199                          */
13200                         __mark_reg_unknown(env, dst_reg);
13201                         return 0;
13202                 }
13203         }
13204
13205         if (!src_known &&
13206             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
13207                 __mark_reg_unknown(env, dst_reg);
13208                 return 0;
13209         }
13210
13211         if (sanitize_needed(opcode)) {
13212                 ret = sanitize_val_alu(env, insn);
13213                 if (ret < 0)
13214                         return sanitize_err(env, insn, ret, NULL, NULL);
13215         }
13216
13217         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
13218          * There are two classes of instructions: The first class we track both
13219          * alu32 and alu64 sign/unsigned bounds independently this provides the
13220          * greatest amount of precision when alu operations are mixed with jmp32
13221          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
13222          * and BPF_OR. This is possible because these ops have fairly easy to
13223          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
13224          * See alu32 verifier tests for examples. The second class of
13225          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
13226          * with regards to tracking sign/unsigned bounds because the bits may
13227          * cross subreg boundaries in the alu64 case. When this happens we mark
13228          * the reg unbounded in the subreg bound space and use the resulting
13229          * tnum to calculate an approximation of the sign/unsigned bounds.
13230          */
13231         switch (opcode) {
13232         case BPF_ADD:
13233                 scalar32_min_max_add(dst_reg, &src_reg);
13234                 scalar_min_max_add(dst_reg, &src_reg);
13235                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
13236                 break;
13237         case BPF_SUB:
13238                 scalar32_min_max_sub(dst_reg, &src_reg);
13239                 scalar_min_max_sub(dst_reg, &src_reg);
13240                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
13241                 break;
13242         case BPF_MUL:
13243                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
13244                 scalar32_min_max_mul(dst_reg, &src_reg);
13245                 scalar_min_max_mul(dst_reg, &src_reg);
13246                 break;
13247         case BPF_AND:
13248                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
13249                 scalar32_min_max_and(dst_reg, &src_reg);
13250                 scalar_min_max_and(dst_reg, &src_reg);
13251                 break;
13252         case BPF_OR:
13253                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
13254                 scalar32_min_max_or(dst_reg, &src_reg);
13255                 scalar_min_max_or(dst_reg, &src_reg);
13256                 break;
13257         case BPF_XOR:
13258                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
13259                 scalar32_min_max_xor(dst_reg, &src_reg);
13260                 scalar_min_max_xor(dst_reg, &src_reg);
13261                 break;
13262         case BPF_LSH:
13263                 if (umax_val >= insn_bitness) {
13264                         /* Shifts greater than 31 or 63 are undefined.
13265                          * This includes shifts by a negative number.
13266                          */
13267                         mark_reg_unknown(env, regs, insn->dst_reg);
13268                         break;
13269                 }
13270                 if (alu32)
13271                         scalar32_min_max_lsh(dst_reg, &src_reg);
13272                 else
13273                         scalar_min_max_lsh(dst_reg, &src_reg);
13274                 break;
13275         case BPF_RSH:
13276                 if (umax_val >= insn_bitness) {
13277                         /* Shifts greater than 31 or 63 are undefined.
13278                          * This includes shifts by a negative number.
13279                          */
13280                         mark_reg_unknown(env, regs, insn->dst_reg);
13281                         break;
13282                 }
13283                 if (alu32)
13284                         scalar32_min_max_rsh(dst_reg, &src_reg);
13285                 else
13286                         scalar_min_max_rsh(dst_reg, &src_reg);
13287                 break;
13288         case BPF_ARSH:
13289                 if (umax_val >= insn_bitness) {
13290                         /* Shifts greater than 31 or 63 are undefined.
13291                          * This includes shifts by a negative number.
13292                          */
13293                         mark_reg_unknown(env, regs, insn->dst_reg);
13294                         break;
13295                 }
13296                 if (alu32)
13297                         scalar32_min_max_arsh(dst_reg, &src_reg);
13298                 else
13299                         scalar_min_max_arsh(dst_reg, &src_reg);
13300                 break;
13301         default:
13302                 mark_reg_unknown(env, regs, insn->dst_reg);
13303                 break;
13304         }
13305
13306         /* ALU32 ops are zero extended into 64bit register */
13307         if (alu32)
13308                 zext_32_to_64(dst_reg);
13309         reg_bounds_sync(dst_reg);
13310         return 0;
13311 }
13312
13313 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
13314  * and var_off.
13315  */
13316 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
13317                                    struct bpf_insn *insn)
13318 {
13319         struct bpf_verifier_state *vstate = env->cur_state;
13320         struct bpf_func_state *state = vstate->frame[vstate->curframe];
13321         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
13322         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
13323         u8 opcode = BPF_OP(insn->code);
13324         int err;
13325
13326         dst_reg = &regs[insn->dst_reg];
13327         src_reg = NULL;
13328         if (dst_reg->type != SCALAR_VALUE)
13329                 ptr_reg = dst_reg;
13330         else
13331                 /* Make sure ID is cleared otherwise dst_reg min/max could be
13332                  * incorrectly propagated into other registers by find_equal_scalars()
13333                  */
13334                 dst_reg->id = 0;
13335         if (BPF_SRC(insn->code) == BPF_X) {
13336                 src_reg = &regs[insn->src_reg];
13337                 if (src_reg->type != SCALAR_VALUE) {
13338                         if (dst_reg->type != SCALAR_VALUE) {
13339                                 /* Combining two pointers by any ALU op yields
13340                                  * an arbitrary scalar. Disallow all math except
13341                                  * pointer subtraction
13342                                  */
13343                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
13344                                         mark_reg_unknown(env, regs, insn->dst_reg);
13345                                         return 0;
13346                                 }
13347                                 verbose(env, "R%d pointer %s pointer prohibited\n",
13348                                         insn->dst_reg,
13349                                         bpf_alu_string[opcode >> 4]);
13350                                 return -EACCES;
13351                         } else {
13352                                 /* scalar += pointer
13353                                  * This is legal, but we have to reverse our
13354                                  * src/dest handling in computing the range
13355                                  */
13356                                 err = mark_chain_precision(env, insn->dst_reg);
13357                                 if (err)
13358                                         return err;
13359                                 return adjust_ptr_min_max_vals(env, insn,
13360                                                                src_reg, dst_reg);
13361                         }
13362                 } else if (ptr_reg) {
13363                         /* pointer += scalar */
13364                         err = mark_chain_precision(env, insn->src_reg);
13365                         if (err)
13366                                 return err;
13367                         return adjust_ptr_min_max_vals(env, insn,
13368                                                        dst_reg, src_reg);
13369                 } else if (dst_reg->precise) {
13370                         /* if dst_reg is precise, src_reg should be precise as well */
13371                         err = mark_chain_precision(env, insn->src_reg);
13372                         if (err)
13373                                 return err;
13374                 }
13375         } else {
13376                 /* Pretend the src is a reg with a known value, since we only
13377                  * need to be able to read from this state.
13378                  */
13379                 off_reg.type = SCALAR_VALUE;
13380                 __mark_reg_known(&off_reg, insn->imm);
13381                 src_reg = &off_reg;
13382                 if (ptr_reg) /* pointer += K */
13383                         return adjust_ptr_min_max_vals(env, insn,
13384                                                        ptr_reg, src_reg);
13385         }
13386
13387         /* Got here implies adding two SCALAR_VALUEs */
13388         if (WARN_ON_ONCE(ptr_reg)) {
13389                 print_verifier_state(env, state, true);
13390                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
13391                 return -EINVAL;
13392         }
13393         if (WARN_ON(!src_reg)) {
13394                 print_verifier_state(env, state, true);
13395                 verbose(env, "verifier internal error: no src_reg\n");
13396                 return -EINVAL;
13397         }
13398         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
13399 }
13400
13401 /* check validity of 32-bit and 64-bit arithmetic operations */
13402 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
13403 {
13404         struct bpf_reg_state *regs = cur_regs(env);
13405         u8 opcode = BPF_OP(insn->code);
13406         int err;
13407
13408         if (opcode == BPF_END || opcode == BPF_NEG) {
13409                 if (opcode == BPF_NEG) {
13410                         if (BPF_SRC(insn->code) != BPF_K ||
13411                             insn->src_reg != BPF_REG_0 ||
13412                             insn->off != 0 || insn->imm != 0) {
13413                                 verbose(env, "BPF_NEG uses reserved fields\n");
13414                                 return -EINVAL;
13415                         }
13416                 } else {
13417                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
13418                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
13419                             (BPF_CLASS(insn->code) == BPF_ALU64 &&
13420                              BPF_SRC(insn->code) != BPF_TO_LE)) {
13421                                 verbose(env, "BPF_END uses reserved fields\n");
13422                                 return -EINVAL;
13423                         }
13424                 }
13425
13426                 /* check src operand */
13427                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13428                 if (err)
13429                         return err;
13430
13431                 if (is_pointer_value(env, insn->dst_reg)) {
13432                         verbose(env, "R%d pointer arithmetic prohibited\n",
13433                                 insn->dst_reg);
13434                         return -EACCES;
13435                 }
13436
13437                 /* check dest operand */
13438                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
13439                 if (err)
13440                         return err;
13441
13442         } else if (opcode == BPF_MOV) {
13443
13444                 if (BPF_SRC(insn->code) == BPF_X) {
13445                         if (insn->imm != 0) {
13446                                 verbose(env, "BPF_MOV uses reserved fields\n");
13447                                 return -EINVAL;
13448                         }
13449
13450                         if (BPF_CLASS(insn->code) == BPF_ALU) {
13451                                 if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
13452                                         verbose(env, "BPF_MOV uses reserved fields\n");
13453                                         return -EINVAL;
13454                                 }
13455                         } else {
13456                                 if (insn->off != 0 && insn->off != 8 && insn->off != 16 &&
13457                                     insn->off != 32) {
13458                                         verbose(env, "BPF_MOV uses reserved fields\n");
13459                                         return -EINVAL;
13460                                 }
13461                         }
13462
13463                         /* check src operand */
13464                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
13465                         if (err)
13466                                 return err;
13467                 } else {
13468                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
13469                                 verbose(env, "BPF_MOV uses reserved fields\n");
13470                                 return -EINVAL;
13471                         }
13472                 }
13473
13474                 /* check dest operand, mark as required later */
13475                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
13476                 if (err)
13477                         return err;
13478
13479                 if (BPF_SRC(insn->code) == BPF_X) {
13480                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
13481                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
13482                         bool need_id = src_reg->type == SCALAR_VALUE && !src_reg->id &&
13483                                        !tnum_is_const(src_reg->var_off);
13484
13485                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
13486                                 if (insn->off == 0) {
13487                                         /* case: R1 = R2
13488                                          * copy register state to dest reg
13489                                          */
13490                                         if (need_id)
13491                                                 /* Assign src and dst registers the same ID
13492                                                  * that will be used by find_equal_scalars()
13493                                                  * to propagate min/max range.
13494                                                  */
13495                                                 src_reg->id = ++env->id_gen;
13496                                         copy_register_state(dst_reg, src_reg);
13497                                         dst_reg->live |= REG_LIVE_WRITTEN;
13498                                         dst_reg->subreg_def = DEF_NOT_SUBREG;
13499                                 } else {
13500                                         /* case: R1 = (s8, s16 s32)R2 */
13501                                         if (is_pointer_value(env, insn->src_reg)) {
13502                                                 verbose(env,
13503                                                         "R%d sign-extension part of pointer\n",
13504                                                         insn->src_reg);
13505                                                 return -EACCES;
13506                                         } else if (src_reg->type == SCALAR_VALUE) {
13507                                                 bool no_sext;
13508
13509                                                 no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
13510                                                 if (no_sext && need_id)
13511                                                         src_reg->id = ++env->id_gen;
13512                                                 copy_register_state(dst_reg, src_reg);
13513                                                 if (!no_sext)
13514                                                         dst_reg->id = 0;
13515                                                 coerce_reg_to_size_sx(dst_reg, insn->off >> 3);
13516                                                 dst_reg->live |= REG_LIVE_WRITTEN;
13517                                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
13518                                         } else {
13519                                                 mark_reg_unknown(env, regs, insn->dst_reg);
13520                                         }
13521                                 }
13522                         } else {
13523                                 /* R1 = (u32) R2 */
13524                                 if (is_pointer_value(env, insn->src_reg)) {
13525                                         verbose(env,
13526                                                 "R%d partial copy of pointer\n",
13527                                                 insn->src_reg);
13528                                         return -EACCES;
13529                                 } else if (src_reg->type == SCALAR_VALUE) {
13530                                         if (insn->off == 0) {
13531                                                 bool is_src_reg_u32 = src_reg->umax_value <= U32_MAX;
13532
13533                                                 if (is_src_reg_u32 && need_id)
13534                                                         src_reg->id = ++env->id_gen;
13535                                                 copy_register_state(dst_reg, src_reg);
13536                                                 /* Make sure ID is cleared if src_reg is not in u32
13537                                                  * range otherwise dst_reg min/max could be incorrectly
13538                                                  * propagated into src_reg by find_equal_scalars()
13539                                                  */
13540                                                 if (!is_src_reg_u32)
13541                                                         dst_reg->id = 0;
13542                                                 dst_reg->live |= REG_LIVE_WRITTEN;
13543                                                 dst_reg->subreg_def = env->insn_idx + 1;
13544                                         } else {
13545                                                 /* case: W1 = (s8, s16)W2 */
13546                                                 bool no_sext = src_reg->umax_value < (1ULL << (insn->off - 1));
13547
13548                                                 if (no_sext && need_id)
13549                                                         src_reg->id = ++env->id_gen;
13550                                                 copy_register_state(dst_reg, src_reg);
13551                                                 if (!no_sext)
13552                                                         dst_reg->id = 0;
13553                                                 dst_reg->live |= REG_LIVE_WRITTEN;
13554                                                 dst_reg->subreg_def = env->insn_idx + 1;
13555                                                 coerce_subreg_to_size_sx(dst_reg, insn->off >> 3);
13556                                         }
13557                                 } else {
13558                                         mark_reg_unknown(env, regs,
13559                                                          insn->dst_reg);
13560                                 }
13561                                 zext_32_to_64(dst_reg);
13562                                 reg_bounds_sync(dst_reg);
13563                         }
13564                 } else {
13565                         /* case: R = imm
13566                          * remember the value we stored into this reg
13567                          */
13568                         /* clear any state __mark_reg_known doesn't set */
13569                         mark_reg_unknown(env, regs, insn->dst_reg);
13570                         regs[insn->dst_reg].type = SCALAR_VALUE;
13571                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
13572                                 __mark_reg_known(regs + insn->dst_reg,
13573                                                  insn->imm);
13574                         } else {
13575                                 __mark_reg_known(regs + insn->dst_reg,
13576                                                  (u32)insn->imm);
13577                         }
13578                 }
13579
13580         } else if (opcode > BPF_END) {
13581                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
13582                 return -EINVAL;
13583
13584         } else {        /* all other ALU ops: and, sub, xor, add, ... */
13585
13586                 if (BPF_SRC(insn->code) == BPF_X) {
13587                         if (insn->imm != 0 || insn->off > 1 ||
13588                             (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
13589                                 verbose(env, "BPF_ALU uses reserved fields\n");
13590                                 return -EINVAL;
13591                         }
13592                         /* check src1 operand */
13593                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
13594                         if (err)
13595                                 return err;
13596                 } else {
13597                         if (insn->src_reg != BPF_REG_0 || insn->off > 1 ||
13598                             (insn->off == 1 && opcode != BPF_MOD && opcode != BPF_DIV)) {
13599                                 verbose(env, "BPF_ALU uses reserved fields\n");
13600                                 return -EINVAL;
13601                         }
13602                 }
13603
13604                 /* check src2 operand */
13605                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
13606                 if (err)
13607                         return err;
13608
13609                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
13610                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
13611                         verbose(env, "div by zero\n");
13612                         return -EINVAL;
13613                 }
13614
13615                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
13616                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
13617                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
13618
13619                         if (insn->imm < 0 || insn->imm >= size) {
13620                                 verbose(env, "invalid shift %d\n", insn->imm);
13621                                 return -EINVAL;
13622                         }
13623                 }
13624
13625                 /* check dest operand */
13626                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
13627                 if (err)
13628                         return err;
13629
13630                 return adjust_reg_min_max_vals(env, insn);
13631         }
13632
13633         return 0;
13634 }
13635
13636 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
13637                                    struct bpf_reg_state *dst_reg,
13638                                    enum bpf_reg_type type,
13639                                    bool range_right_open)
13640 {
13641         struct bpf_func_state *state;
13642         struct bpf_reg_state *reg;
13643         int new_range;
13644
13645         if (dst_reg->off < 0 ||
13646             (dst_reg->off == 0 && range_right_open))
13647                 /* This doesn't give us any range */
13648                 return;
13649
13650         if (dst_reg->umax_value > MAX_PACKET_OFF ||
13651             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
13652                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
13653                  * than pkt_end, but that's because it's also less than pkt.
13654                  */
13655                 return;
13656
13657         new_range = dst_reg->off;
13658         if (range_right_open)
13659                 new_range++;
13660
13661         /* Examples for register markings:
13662          *
13663          * pkt_data in dst register:
13664          *
13665          *   r2 = r3;
13666          *   r2 += 8;
13667          *   if (r2 > pkt_end) goto <handle exception>
13668          *   <access okay>
13669          *
13670          *   r2 = r3;
13671          *   r2 += 8;
13672          *   if (r2 < pkt_end) goto <access okay>
13673          *   <handle exception>
13674          *
13675          *   Where:
13676          *     r2 == dst_reg, pkt_end == src_reg
13677          *     r2=pkt(id=n,off=8,r=0)
13678          *     r3=pkt(id=n,off=0,r=0)
13679          *
13680          * pkt_data in src register:
13681          *
13682          *   r2 = r3;
13683          *   r2 += 8;
13684          *   if (pkt_end >= r2) goto <access okay>
13685          *   <handle exception>
13686          *
13687          *   r2 = r3;
13688          *   r2 += 8;
13689          *   if (pkt_end <= r2) goto <handle exception>
13690          *   <access okay>
13691          *
13692          *   Where:
13693          *     pkt_end == dst_reg, r2 == src_reg
13694          *     r2=pkt(id=n,off=8,r=0)
13695          *     r3=pkt(id=n,off=0,r=0)
13696          *
13697          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
13698          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
13699          * and [r3, r3 + 8-1) respectively is safe to access depending on
13700          * the check.
13701          */
13702
13703         /* If our ids match, then we must have the same max_value.  And we
13704          * don't care about the other reg's fixed offset, since if it's too big
13705          * the range won't allow anything.
13706          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
13707          */
13708         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
13709                 if (reg->type == type && reg->id == dst_reg->id)
13710                         /* keep the maximum range already checked */
13711                         reg->range = max(reg->range, new_range);
13712         }));
13713 }
13714
13715 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
13716 {
13717         struct tnum subreg = tnum_subreg(reg->var_off);
13718         s32 sval = (s32)val;
13719
13720         switch (opcode) {
13721         case BPF_JEQ:
13722                 if (tnum_is_const(subreg))
13723                         return !!tnum_equals_const(subreg, val);
13724                 else if (val < reg->u32_min_value || val > reg->u32_max_value)
13725                         return 0;
13726                 break;
13727         case BPF_JNE:
13728                 if (tnum_is_const(subreg))
13729                         return !tnum_equals_const(subreg, val);
13730                 else if (val < reg->u32_min_value || val > reg->u32_max_value)
13731                         return 1;
13732                 break;
13733         case BPF_JSET:
13734                 if ((~subreg.mask & subreg.value) & val)
13735                         return 1;
13736                 if (!((subreg.mask | subreg.value) & val))
13737                         return 0;
13738                 break;
13739         case BPF_JGT:
13740                 if (reg->u32_min_value > val)
13741                         return 1;
13742                 else if (reg->u32_max_value <= val)
13743                         return 0;
13744                 break;
13745         case BPF_JSGT:
13746                 if (reg->s32_min_value > sval)
13747                         return 1;
13748                 else if (reg->s32_max_value <= sval)
13749                         return 0;
13750                 break;
13751         case BPF_JLT:
13752                 if (reg->u32_max_value < val)
13753                         return 1;
13754                 else if (reg->u32_min_value >= val)
13755                         return 0;
13756                 break;
13757         case BPF_JSLT:
13758                 if (reg->s32_max_value < sval)
13759                         return 1;
13760                 else if (reg->s32_min_value >= sval)
13761                         return 0;
13762                 break;
13763         case BPF_JGE:
13764                 if (reg->u32_min_value >= val)
13765                         return 1;
13766                 else if (reg->u32_max_value < val)
13767                         return 0;
13768                 break;
13769         case BPF_JSGE:
13770                 if (reg->s32_min_value >= sval)
13771                         return 1;
13772                 else if (reg->s32_max_value < sval)
13773                         return 0;
13774                 break;
13775         case BPF_JLE:
13776                 if (reg->u32_max_value <= val)
13777                         return 1;
13778                 else if (reg->u32_min_value > val)
13779                         return 0;
13780                 break;
13781         case BPF_JSLE:
13782                 if (reg->s32_max_value <= sval)
13783                         return 1;
13784                 else if (reg->s32_min_value > sval)
13785                         return 0;
13786                 break;
13787         }
13788
13789         return -1;
13790 }
13791
13792
13793 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
13794 {
13795         s64 sval = (s64)val;
13796
13797         switch (opcode) {
13798         case BPF_JEQ:
13799                 if (tnum_is_const(reg->var_off))
13800                         return !!tnum_equals_const(reg->var_off, val);
13801                 else if (val < reg->umin_value || val > reg->umax_value)
13802                         return 0;
13803                 break;
13804         case BPF_JNE:
13805                 if (tnum_is_const(reg->var_off))
13806                         return !tnum_equals_const(reg->var_off, val);
13807                 else if (val < reg->umin_value || val > reg->umax_value)
13808                         return 1;
13809                 break;
13810         case BPF_JSET:
13811                 if ((~reg->var_off.mask & reg->var_off.value) & val)
13812                         return 1;
13813                 if (!((reg->var_off.mask | reg->var_off.value) & val))
13814                         return 0;
13815                 break;
13816         case BPF_JGT:
13817                 if (reg->umin_value > val)
13818                         return 1;
13819                 else if (reg->umax_value <= val)
13820                         return 0;
13821                 break;
13822         case BPF_JSGT:
13823                 if (reg->smin_value > sval)
13824                         return 1;
13825                 else if (reg->smax_value <= sval)
13826                         return 0;
13827                 break;
13828         case BPF_JLT:
13829                 if (reg->umax_value < val)
13830                         return 1;
13831                 else if (reg->umin_value >= val)
13832                         return 0;
13833                 break;
13834         case BPF_JSLT:
13835                 if (reg->smax_value < sval)
13836                         return 1;
13837                 else if (reg->smin_value >= sval)
13838                         return 0;
13839                 break;
13840         case BPF_JGE:
13841                 if (reg->umin_value >= val)
13842                         return 1;
13843                 else if (reg->umax_value < val)
13844                         return 0;
13845                 break;
13846         case BPF_JSGE:
13847                 if (reg->smin_value >= sval)
13848                         return 1;
13849                 else if (reg->smax_value < sval)
13850                         return 0;
13851                 break;
13852         case BPF_JLE:
13853                 if (reg->umax_value <= val)
13854                         return 1;
13855                 else if (reg->umin_value > val)
13856                         return 0;
13857                 break;
13858         case BPF_JSLE:
13859                 if (reg->smax_value <= sval)
13860                         return 1;
13861                 else if (reg->smin_value > sval)
13862                         return 0;
13863                 break;
13864         }
13865
13866         return -1;
13867 }
13868
13869 /* compute branch direction of the expression "if (reg opcode val) goto target;"
13870  * and return:
13871  *  1 - branch will be taken and "goto target" will be executed
13872  *  0 - branch will not be taken and fall-through to next insn
13873  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
13874  *      range [0,10]
13875  */
13876 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
13877                            bool is_jmp32)
13878 {
13879         if (__is_pointer_value(false, reg)) {
13880                 if (!reg_not_null(reg))
13881                         return -1;
13882
13883                 /* If pointer is valid tests against zero will fail so we can
13884                  * use this to direct branch taken.
13885                  */
13886                 if (val != 0)
13887                         return -1;
13888
13889                 switch (opcode) {
13890                 case BPF_JEQ:
13891                         return 0;
13892                 case BPF_JNE:
13893                         return 1;
13894                 default:
13895                         return -1;
13896                 }
13897         }
13898
13899         if (is_jmp32)
13900                 return is_branch32_taken(reg, val, opcode);
13901         return is_branch64_taken(reg, val, opcode);
13902 }
13903
13904 static int flip_opcode(u32 opcode)
13905 {
13906         /* How can we transform "a <op> b" into "b <op> a"? */
13907         static const u8 opcode_flip[16] = {
13908                 /* these stay the same */
13909                 [BPF_JEQ  >> 4] = BPF_JEQ,
13910                 [BPF_JNE  >> 4] = BPF_JNE,
13911                 [BPF_JSET >> 4] = BPF_JSET,
13912                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
13913                 [BPF_JGE  >> 4] = BPF_JLE,
13914                 [BPF_JGT  >> 4] = BPF_JLT,
13915                 [BPF_JLE  >> 4] = BPF_JGE,
13916                 [BPF_JLT  >> 4] = BPF_JGT,
13917                 [BPF_JSGE >> 4] = BPF_JSLE,
13918                 [BPF_JSGT >> 4] = BPF_JSLT,
13919                 [BPF_JSLE >> 4] = BPF_JSGE,
13920                 [BPF_JSLT >> 4] = BPF_JSGT
13921         };
13922         return opcode_flip[opcode >> 4];
13923 }
13924
13925 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
13926                                    struct bpf_reg_state *src_reg,
13927                                    u8 opcode)
13928 {
13929         struct bpf_reg_state *pkt;
13930
13931         if (src_reg->type == PTR_TO_PACKET_END) {
13932                 pkt = dst_reg;
13933         } else if (dst_reg->type == PTR_TO_PACKET_END) {
13934                 pkt = src_reg;
13935                 opcode = flip_opcode(opcode);
13936         } else {
13937                 return -1;
13938         }
13939
13940         if (pkt->range >= 0)
13941                 return -1;
13942
13943         switch (opcode) {
13944         case BPF_JLE:
13945                 /* pkt <= pkt_end */
13946                 fallthrough;
13947         case BPF_JGT:
13948                 /* pkt > pkt_end */
13949                 if (pkt->range == BEYOND_PKT_END)
13950                         /* pkt has at last one extra byte beyond pkt_end */
13951                         return opcode == BPF_JGT;
13952                 break;
13953         case BPF_JLT:
13954                 /* pkt < pkt_end */
13955                 fallthrough;
13956         case BPF_JGE:
13957                 /* pkt >= pkt_end */
13958                 if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
13959                         return opcode == BPF_JGE;
13960                 break;
13961         }
13962         return -1;
13963 }
13964
13965 /* Adjusts the register min/max values in the case that the dst_reg is the
13966  * variable register that we are working on, and src_reg is a constant or we're
13967  * simply doing a BPF_K check.
13968  * In JEQ/JNE cases we also adjust the var_off values.
13969  */
13970 static void reg_set_min_max(struct bpf_reg_state *true_reg,
13971                             struct bpf_reg_state *false_reg,
13972                             u64 val, u32 val32,
13973                             u8 opcode, bool is_jmp32)
13974 {
13975         struct tnum false_32off = tnum_subreg(false_reg->var_off);
13976         struct tnum false_64off = false_reg->var_off;
13977         struct tnum true_32off = tnum_subreg(true_reg->var_off);
13978         struct tnum true_64off = true_reg->var_off;
13979         s64 sval = (s64)val;
13980         s32 sval32 = (s32)val32;
13981
13982         /* If the dst_reg is a pointer, we can't learn anything about its
13983          * variable offset from the compare (unless src_reg were a pointer into
13984          * the same object, but we don't bother with that.
13985          * Since false_reg and true_reg have the same type by construction, we
13986          * only need to check one of them for pointerness.
13987          */
13988         if (__is_pointer_value(false, false_reg))
13989                 return;
13990
13991         switch (opcode) {
13992         /* JEQ/JNE comparison doesn't change the register equivalence.
13993          *
13994          * r1 = r2;
13995          * if (r1 == 42) goto label;
13996          * ...
13997          * label: // here both r1 and r2 are known to be 42.
13998          *
13999          * Hence when marking register as known preserve it's ID.
14000          */
14001         case BPF_JEQ:
14002                 if (is_jmp32) {
14003                         __mark_reg32_known(true_reg, val32);
14004                         true_32off = tnum_subreg(true_reg->var_off);
14005                 } else {
14006                         ___mark_reg_known(true_reg, val);
14007                         true_64off = true_reg->var_off;
14008                 }
14009                 break;
14010         case BPF_JNE:
14011                 if (is_jmp32) {
14012                         __mark_reg32_known(false_reg, val32);
14013                         false_32off = tnum_subreg(false_reg->var_off);
14014                 } else {
14015                         ___mark_reg_known(false_reg, val);
14016                         false_64off = false_reg->var_off;
14017                 }
14018                 break;
14019         case BPF_JSET:
14020                 if (is_jmp32) {
14021                         false_32off = tnum_and(false_32off, tnum_const(~val32));
14022                         if (is_power_of_2(val32))
14023                                 true_32off = tnum_or(true_32off,
14024                                                      tnum_const(val32));
14025                 } else {
14026                         false_64off = tnum_and(false_64off, tnum_const(~val));
14027                         if (is_power_of_2(val))
14028                                 true_64off = tnum_or(true_64off,
14029                                                      tnum_const(val));
14030                 }
14031                 break;
14032         case BPF_JGE:
14033         case BPF_JGT:
14034         {
14035                 if (is_jmp32) {
14036                         u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
14037                         u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
14038
14039                         false_reg->u32_max_value = min(false_reg->u32_max_value,
14040                                                        false_umax);
14041                         true_reg->u32_min_value = max(true_reg->u32_min_value,
14042                                                       true_umin);
14043                 } else {
14044                         u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
14045                         u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
14046
14047                         false_reg->umax_value = min(false_reg->umax_value, false_umax);
14048                         true_reg->umin_value = max(true_reg->umin_value, true_umin);
14049                 }
14050                 break;
14051         }
14052         case BPF_JSGE:
14053         case BPF_JSGT:
14054         {
14055                 if (is_jmp32) {
14056                         s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
14057                         s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
14058
14059                         false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
14060                         true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
14061                 } else {
14062                         s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
14063                         s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
14064
14065                         false_reg->smax_value = min(false_reg->smax_value, false_smax);
14066                         true_reg->smin_value = max(true_reg->smin_value, true_smin);
14067                 }
14068                 break;
14069         }
14070         case BPF_JLE:
14071         case BPF_JLT:
14072         {
14073                 if (is_jmp32) {
14074                         u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
14075                         u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
14076
14077                         false_reg->u32_min_value = max(false_reg->u32_min_value,
14078                                                        false_umin);
14079                         true_reg->u32_max_value = min(true_reg->u32_max_value,
14080                                                       true_umax);
14081                 } else {
14082                         u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
14083                         u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
14084
14085                         false_reg->umin_value = max(false_reg->umin_value, false_umin);
14086                         true_reg->umax_value = min(true_reg->umax_value, true_umax);
14087                 }
14088                 break;
14089         }
14090         case BPF_JSLE:
14091         case BPF_JSLT:
14092         {
14093                 if (is_jmp32) {
14094                         s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
14095                         s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
14096
14097                         false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
14098                         true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
14099                 } else {
14100                         s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
14101                         s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
14102
14103                         false_reg->smin_value = max(false_reg->smin_value, false_smin);
14104                         true_reg->smax_value = min(true_reg->smax_value, true_smax);
14105                 }
14106                 break;
14107         }
14108         default:
14109                 return;
14110         }
14111
14112         if (is_jmp32) {
14113                 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
14114                                              tnum_subreg(false_32off));
14115                 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
14116                                             tnum_subreg(true_32off));
14117                 __reg_combine_32_into_64(false_reg);
14118                 __reg_combine_32_into_64(true_reg);
14119         } else {
14120                 false_reg->var_off = false_64off;
14121                 true_reg->var_off = true_64off;
14122                 __reg_combine_64_into_32(false_reg);
14123                 __reg_combine_64_into_32(true_reg);
14124         }
14125 }
14126
14127 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
14128  * the variable reg.
14129  */
14130 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
14131                                 struct bpf_reg_state *false_reg,
14132                                 u64 val, u32 val32,
14133                                 u8 opcode, bool is_jmp32)
14134 {
14135         opcode = flip_opcode(opcode);
14136         /* This uses zero as "not present in table"; luckily the zero opcode,
14137          * BPF_JA, can't get here.
14138          */
14139         if (opcode)
14140                 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
14141 }
14142
14143 /* Regs are known to be equal, so intersect their min/max/var_off */
14144 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
14145                                   struct bpf_reg_state *dst_reg)
14146 {
14147         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
14148                                                         dst_reg->umin_value);
14149         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
14150                                                         dst_reg->umax_value);
14151         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
14152                                                         dst_reg->smin_value);
14153         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
14154                                                         dst_reg->smax_value);
14155         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
14156                                                              dst_reg->var_off);
14157         reg_bounds_sync(src_reg);
14158         reg_bounds_sync(dst_reg);
14159 }
14160
14161 static void reg_combine_min_max(struct bpf_reg_state *true_src,
14162                                 struct bpf_reg_state *true_dst,
14163                                 struct bpf_reg_state *false_src,
14164                                 struct bpf_reg_state *false_dst,
14165                                 u8 opcode)
14166 {
14167         switch (opcode) {
14168         case BPF_JEQ:
14169                 __reg_combine_min_max(true_src, true_dst);
14170                 break;
14171         case BPF_JNE:
14172                 __reg_combine_min_max(false_src, false_dst);
14173                 break;
14174         }
14175 }
14176
14177 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
14178                                  struct bpf_reg_state *reg, u32 id,
14179                                  bool is_null)
14180 {
14181         if (type_may_be_null(reg->type) && reg->id == id &&
14182             (is_rcu_reg(reg) || !WARN_ON_ONCE(!reg->id))) {
14183                 /* Old offset (both fixed and variable parts) should have been
14184                  * known-zero, because we don't allow pointer arithmetic on
14185                  * pointers that might be NULL. If we see this happening, don't
14186                  * convert the register.
14187                  *
14188                  * But in some cases, some helpers that return local kptrs
14189                  * advance offset for the returned pointer. In those cases, it
14190                  * is fine to expect to see reg->off.
14191                  */
14192                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || !tnum_equals_const(reg->var_off, 0)))
14193                         return;
14194                 if (!(type_is_ptr_alloc_obj(reg->type) || type_is_non_owning_ref(reg->type)) &&
14195                     WARN_ON_ONCE(reg->off))
14196                         return;
14197
14198                 if (is_null) {
14199                         reg->type = SCALAR_VALUE;
14200                         /* We don't need id and ref_obj_id from this point
14201                          * onwards anymore, thus we should better reset it,
14202                          * so that state pruning has chances to take effect.
14203                          */
14204                         reg->id = 0;
14205                         reg->ref_obj_id = 0;
14206
14207                         return;
14208                 }
14209
14210                 mark_ptr_not_null_reg(reg);
14211
14212                 if (!reg_may_point_to_spin_lock(reg)) {
14213                         /* For not-NULL ptr, reg->ref_obj_id will be reset
14214                          * in release_reference().
14215                          *
14216                          * reg->id is still used by spin_lock ptr. Other
14217                          * than spin_lock ptr type, reg->id can be reset.
14218                          */
14219                         reg->id = 0;
14220                 }
14221         }
14222 }
14223
14224 /* The logic is similar to find_good_pkt_pointers(), both could eventually
14225  * be folded together at some point.
14226  */
14227 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
14228                                   bool is_null)
14229 {
14230         struct bpf_func_state *state = vstate->frame[vstate->curframe];
14231         struct bpf_reg_state *regs = state->regs, *reg;
14232         u32 ref_obj_id = regs[regno].ref_obj_id;
14233         u32 id = regs[regno].id;
14234
14235         if (ref_obj_id && ref_obj_id == id && is_null)
14236                 /* regs[regno] is in the " == NULL" branch.
14237                  * No one could have freed the reference state before
14238                  * doing the NULL check.
14239                  */
14240                 WARN_ON_ONCE(release_reference_state(state, id));
14241
14242         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14243                 mark_ptr_or_null_reg(state, reg, id, is_null);
14244         }));
14245 }
14246
14247 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
14248                                    struct bpf_reg_state *dst_reg,
14249                                    struct bpf_reg_state *src_reg,
14250                                    struct bpf_verifier_state *this_branch,
14251                                    struct bpf_verifier_state *other_branch)
14252 {
14253         if (BPF_SRC(insn->code) != BPF_X)
14254                 return false;
14255
14256         /* Pointers are always 64-bit. */
14257         if (BPF_CLASS(insn->code) == BPF_JMP32)
14258                 return false;
14259
14260         switch (BPF_OP(insn->code)) {
14261         case BPF_JGT:
14262                 if ((dst_reg->type == PTR_TO_PACKET &&
14263                      src_reg->type == PTR_TO_PACKET_END) ||
14264                     (dst_reg->type == PTR_TO_PACKET_META &&
14265                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
14266                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
14267                         find_good_pkt_pointers(this_branch, dst_reg,
14268                                                dst_reg->type, false);
14269                         mark_pkt_end(other_branch, insn->dst_reg, true);
14270                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
14271                             src_reg->type == PTR_TO_PACKET) ||
14272                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
14273                             src_reg->type == PTR_TO_PACKET_META)) {
14274                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
14275                         find_good_pkt_pointers(other_branch, src_reg,
14276                                                src_reg->type, true);
14277                         mark_pkt_end(this_branch, insn->src_reg, false);
14278                 } else {
14279                         return false;
14280                 }
14281                 break;
14282         case BPF_JLT:
14283                 if ((dst_reg->type == PTR_TO_PACKET &&
14284                      src_reg->type == PTR_TO_PACKET_END) ||
14285                     (dst_reg->type == PTR_TO_PACKET_META &&
14286                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
14287                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
14288                         find_good_pkt_pointers(other_branch, dst_reg,
14289                                                dst_reg->type, true);
14290                         mark_pkt_end(this_branch, insn->dst_reg, false);
14291                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
14292                             src_reg->type == PTR_TO_PACKET) ||
14293                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
14294                             src_reg->type == PTR_TO_PACKET_META)) {
14295                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
14296                         find_good_pkt_pointers(this_branch, src_reg,
14297                                                src_reg->type, false);
14298                         mark_pkt_end(other_branch, insn->src_reg, true);
14299                 } else {
14300                         return false;
14301                 }
14302                 break;
14303         case BPF_JGE:
14304                 if ((dst_reg->type == PTR_TO_PACKET &&
14305                      src_reg->type == PTR_TO_PACKET_END) ||
14306                     (dst_reg->type == PTR_TO_PACKET_META &&
14307                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
14308                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
14309                         find_good_pkt_pointers(this_branch, dst_reg,
14310                                                dst_reg->type, true);
14311                         mark_pkt_end(other_branch, insn->dst_reg, false);
14312                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
14313                             src_reg->type == PTR_TO_PACKET) ||
14314                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
14315                             src_reg->type == PTR_TO_PACKET_META)) {
14316                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
14317                         find_good_pkt_pointers(other_branch, src_reg,
14318                                                src_reg->type, false);
14319                         mark_pkt_end(this_branch, insn->src_reg, true);
14320                 } else {
14321                         return false;
14322                 }
14323                 break;
14324         case BPF_JLE:
14325                 if ((dst_reg->type == PTR_TO_PACKET &&
14326                      src_reg->type == PTR_TO_PACKET_END) ||
14327                     (dst_reg->type == PTR_TO_PACKET_META &&
14328                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
14329                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
14330                         find_good_pkt_pointers(other_branch, dst_reg,
14331                                                dst_reg->type, false);
14332                         mark_pkt_end(this_branch, insn->dst_reg, true);
14333                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
14334                             src_reg->type == PTR_TO_PACKET) ||
14335                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
14336                             src_reg->type == PTR_TO_PACKET_META)) {
14337                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
14338                         find_good_pkt_pointers(this_branch, src_reg,
14339                                                src_reg->type, true);
14340                         mark_pkt_end(other_branch, insn->src_reg, false);
14341                 } else {
14342                         return false;
14343                 }
14344                 break;
14345         default:
14346                 return false;
14347         }
14348
14349         return true;
14350 }
14351
14352 static void find_equal_scalars(struct bpf_verifier_state *vstate,
14353                                struct bpf_reg_state *known_reg)
14354 {
14355         struct bpf_func_state *state;
14356         struct bpf_reg_state *reg;
14357
14358         bpf_for_each_reg_in_vstate(vstate, state, reg, ({
14359                 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
14360                         copy_register_state(reg, known_reg);
14361         }));
14362 }
14363
14364 static int check_cond_jmp_op(struct bpf_verifier_env *env,
14365                              struct bpf_insn *insn, int *insn_idx)
14366 {
14367         struct bpf_verifier_state *this_branch = env->cur_state;
14368         struct bpf_verifier_state *other_branch;
14369         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
14370         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
14371         struct bpf_reg_state *eq_branch_regs;
14372         u8 opcode = BPF_OP(insn->code);
14373         bool is_jmp32;
14374         int pred = -1;
14375         int err;
14376
14377         /* Only conditional jumps are expected to reach here. */
14378         if (opcode == BPF_JA || opcode > BPF_JSLE) {
14379                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
14380                 return -EINVAL;
14381         }
14382
14383         /* check src2 operand */
14384         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
14385         if (err)
14386                 return err;
14387
14388         dst_reg = &regs[insn->dst_reg];
14389         if (BPF_SRC(insn->code) == BPF_X) {
14390                 if (insn->imm != 0) {
14391                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
14392                         return -EINVAL;
14393                 }
14394
14395                 /* check src1 operand */
14396                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
14397                 if (err)
14398                         return err;
14399
14400                 src_reg = &regs[insn->src_reg];
14401                 if (!(reg_is_pkt_pointer_any(dst_reg) && reg_is_pkt_pointer_any(src_reg)) &&
14402                     is_pointer_value(env, insn->src_reg)) {
14403                         verbose(env, "R%d pointer comparison prohibited\n",
14404                                 insn->src_reg);
14405                         return -EACCES;
14406                 }
14407         } else {
14408                 if (insn->src_reg != BPF_REG_0) {
14409                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
14410                         return -EINVAL;
14411                 }
14412         }
14413
14414         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
14415
14416         if (BPF_SRC(insn->code) == BPF_K) {
14417                 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
14418         } else if (src_reg->type == SCALAR_VALUE &&
14419                    is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
14420                 pred = is_branch_taken(dst_reg,
14421                                        tnum_subreg(src_reg->var_off).value,
14422                                        opcode,
14423                                        is_jmp32);
14424         } else if (src_reg->type == SCALAR_VALUE &&
14425                    !is_jmp32 && tnum_is_const(src_reg->var_off)) {
14426                 pred = is_branch_taken(dst_reg,
14427                                        src_reg->var_off.value,
14428                                        opcode,
14429                                        is_jmp32);
14430         } else if (dst_reg->type == SCALAR_VALUE &&
14431                    is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off))) {
14432                 pred = is_branch_taken(src_reg,
14433                                        tnum_subreg(dst_reg->var_off).value,
14434                                        flip_opcode(opcode),
14435                                        is_jmp32);
14436         } else if (dst_reg->type == SCALAR_VALUE &&
14437                    !is_jmp32 && tnum_is_const(dst_reg->var_off)) {
14438                 pred = is_branch_taken(src_reg,
14439                                        dst_reg->var_off.value,
14440                                        flip_opcode(opcode),
14441                                        is_jmp32);
14442         } else if (reg_is_pkt_pointer_any(dst_reg) &&
14443                    reg_is_pkt_pointer_any(src_reg) &&
14444                    !is_jmp32) {
14445                 pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
14446         }
14447
14448         if (pred >= 0) {
14449                 /* If we get here with a dst_reg pointer type it is because
14450                  * above is_branch_taken() special cased the 0 comparison.
14451                  */
14452                 if (!__is_pointer_value(false, dst_reg))
14453                         err = mark_chain_precision(env, insn->dst_reg);
14454                 if (BPF_SRC(insn->code) == BPF_X && !err &&
14455                     !__is_pointer_value(false, src_reg))
14456                         err = mark_chain_precision(env, insn->src_reg);
14457                 if (err)
14458                         return err;
14459         }
14460
14461         if (pred == 1) {
14462                 /* Only follow the goto, ignore fall-through. If needed, push
14463                  * the fall-through branch for simulation under speculative
14464                  * execution.
14465                  */
14466                 if (!env->bypass_spec_v1 &&
14467                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
14468                                                *insn_idx))
14469                         return -EFAULT;
14470                 if (env->log.level & BPF_LOG_LEVEL)
14471                         print_insn_state(env, this_branch->frame[this_branch->curframe]);
14472                 *insn_idx += insn->off;
14473                 return 0;
14474         } else if (pred == 0) {
14475                 /* Only follow the fall-through branch, since that's where the
14476                  * program will go. If needed, push the goto branch for
14477                  * simulation under speculative execution.
14478                  */
14479                 if (!env->bypass_spec_v1 &&
14480                     !sanitize_speculative_path(env, insn,
14481                                                *insn_idx + insn->off + 1,
14482                                                *insn_idx))
14483                         return -EFAULT;
14484                 if (env->log.level & BPF_LOG_LEVEL)
14485                         print_insn_state(env, this_branch->frame[this_branch->curframe]);
14486                 return 0;
14487         }
14488
14489         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
14490                                   false);
14491         if (!other_branch)
14492                 return -EFAULT;
14493         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
14494
14495         /* detect if we are comparing against a constant value so we can adjust
14496          * our min/max values for our dst register.
14497          * this is only legit if both are scalars (or pointers to the same
14498          * object, I suppose, see the PTR_MAYBE_NULL related if block below),
14499          * because otherwise the different base pointers mean the offsets aren't
14500          * comparable.
14501          */
14502         if (BPF_SRC(insn->code) == BPF_X) {
14503                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
14504
14505                 if (dst_reg->type == SCALAR_VALUE &&
14506                     src_reg->type == SCALAR_VALUE) {
14507                         if (tnum_is_const(src_reg->var_off) ||
14508                             (is_jmp32 &&
14509                              tnum_is_const(tnum_subreg(src_reg->var_off))))
14510                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
14511                                                 dst_reg,
14512                                                 src_reg->var_off.value,
14513                                                 tnum_subreg(src_reg->var_off).value,
14514                                                 opcode, is_jmp32);
14515                         else if (tnum_is_const(dst_reg->var_off) ||
14516                                  (is_jmp32 &&
14517                                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
14518                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
14519                                                     src_reg,
14520                                                     dst_reg->var_off.value,
14521                                                     tnum_subreg(dst_reg->var_off).value,
14522                                                     opcode, is_jmp32);
14523                         else if (!is_jmp32 &&
14524                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
14525                                 /* Comparing for equality, we can combine knowledge */
14526                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
14527                                                     &other_branch_regs[insn->dst_reg],
14528                                                     src_reg, dst_reg, opcode);
14529                         if (src_reg->id &&
14530                             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
14531                                 find_equal_scalars(this_branch, src_reg);
14532                                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
14533                         }
14534
14535                 }
14536         } else if (dst_reg->type == SCALAR_VALUE) {
14537                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
14538                                         dst_reg, insn->imm, (u32)insn->imm,
14539                                         opcode, is_jmp32);
14540         }
14541
14542         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
14543             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
14544                 find_equal_scalars(this_branch, dst_reg);
14545                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
14546         }
14547
14548         /* if one pointer register is compared to another pointer
14549          * register check if PTR_MAYBE_NULL could be lifted.
14550          * E.g. register A - maybe null
14551          *      register B - not null
14552          * for JNE A, B, ... - A is not null in the false branch;
14553          * for JEQ A, B, ... - A is not null in the true branch.
14554          *
14555          * Since PTR_TO_BTF_ID points to a kernel struct that does
14556          * not need to be null checked by the BPF program, i.e.,
14557          * could be null even without PTR_MAYBE_NULL marking, so
14558          * only propagate nullness when neither reg is that type.
14559          */
14560         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_X &&
14561             __is_pointer_value(false, src_reg) && __is_pointer_value(false, dst_reg) &&
14562             type_may_be_null(src_reg->type) != type_may_be_null(dst_reg->type) &&
14563             base_type(src_reg->type) != PTR_TO_BTF_ID &&
14564             base_type(dst_reg->type) != PTR_TO_BTF_ID) {
14565                 eq_branch_regs = NULL;
14566                 switch (opcode) {
14567                 case BPF_JEQ:
14568                         eq_branch_regs = other_branch_regs;
14569                         break;
14570                 case BPF_JNE:
14571                         eq_branch_regs = regs;
14572                         break;
14573                 default:
14574                         /* do nothing */
14575                         break;
14576                 }
14577                 if (eq_branch_regs) {
14578                         if (type_may_be_null(src_reg->type))
14579                                 mark_ptr_not_null_reg(&eq_branch_regs[insn->src_reg]);
14580                         else
14581                                 mark_ptr_not_null_reg(&eq_branch_regs[insn->dst_reg]);
14582                 }
14583         }
14584
14585         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
14586          * NOTE: these optimizations below are related with pointer comparison
14587          *       which will never be JMP32.
14588          */
14589         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
14590             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
14591             type_may_be_null(dst_reg->type)) {
14592                 /* Mark all identical registers in each branch as either
14593                  * safe or unknown depending R == 0 or R != 0 conditional.
14594                  */
14595                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
14596                                       opcode == BPF_JNE);
14597                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
14598                                       opcode == BPF_JEQ);
14599         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
14600                                            this_branch, other_branch) &&
14601                    is_pointer_value(env, insn->dst_reg)) {
14602                 verbose(env, "R%d pointer comparison prohibited\n",
14603                         insn->dst_reg);
14604                 return -EACCES;
14605         }
14606         if (env->log.level & BPF_LOG_LEVEL)
14607                 print_insn_state(env, this_branch->frame[this_branch->curframe]);
14608         return 0;
14609 }
14610
14611 /* verify BPF_LD_IMM64 instruction */
14612 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
14613 {
14614         struct bpf_insn_aux_data *aux = cur_aux(env);
14615         struct bpf_reg_state *regs = cur_regs(env);
14616         struct bpf_reg_state *dst_reg;
14617         struct bpf_map *map;
14618         int err;
14619
14620         if (BPF_SIZE(insn->code) != BPF_DW) {
14621                 verbose(env, "invalid BPF_LD_IMM insn\n");
14622                 return -EINVAL;
14623         }
14624         if (insn->off != 0) {
14625                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
14626                 return -EINVAL;
14627         }
14628
14629         err = check_reg_arg(env, insn->dst_reg, DST_OP);
14630         if (err)
14631                 return err;
14632
14633         dst_reg = &regs[insn->dst_reg];
14634         if (insn->src_reg == 0) {
14635                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
14636
14637                 dst_reg->type = SCALAR_VALUE;
14638                 __mark_reg_known(&regs[insn->dst_reg], imm);
14639                 return 0;
14640         }
14641
14642         /* All special src_reg cases are listed below. From this point onwards
14643          * we either succeed and assign a corresponding dst_reg->type after
14644          * zeroing the offset, or fail and reject the program.
14645          */
14646         mark_reg_known_zero(env, regs, insn->dst_reg);
14647
14648         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
14649                 dst_reg->type = aux->btf_var.reg_type;
14650                 switch (base_type(dst_reg->type)) {
14651                 case PTR_TO_MEM:
14652                         dst_reg->mem_size = aux->btf_var.mem_size;
14653                         break;
14654                 case PTR_TO_BTF_ID:
14655                         dst_reg->btf = aux->btf_var.btf;
14656                         dst_reg->btf_id = aux->btf_var.btf_id;
14657                         break;
14658                 default:
14659                         verbose(env, "bpf verifier is misconfigured\n");
14660                         return -EFAULT;
14661                 }
14662                 return 0;
14663         }
14664
14665         if (insn->src_reg == BPF_PSEUDO_FUNC) {
14666                 struct bpf_prog_aux *aux = env->prog->aux;
14667                 u32 subprogno = find_subprog(env,
14668                                              env->insn_idx + insn->imm + 1);
14669
14670                 if (!aux->func_info) {
14671                         verbose(env, "missing btf func_info\n");
14672                         return -EINVAL;
14673                 }
14674                 if (aux->func_info_aux[subprogno].linkage != BTF_FUNC_STATIC) {
14675                         verbose(env, "callback function not static\n");
14676                         return -EINVAL;
14677                 }
14678
14679                 dst_reg->type = PTR_TO_FUNC;
14680                 dst_reg->subprogno = subprogno;
14681                 return 0;
14682         }
14683
14684         map = env->used_maps[aux->map_index];
14685         dst_reg->map_ptr = map;
14686
14687         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
14688             insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
14689                 dst_reg->type = PTR_TO_MAP_VALUE;
14690                 dst_reg->off = aux->map_off;
14691                 WARN_ON_ONCE(map->max_entries != 1);
14692                 /* We want reg->id to be same (0) as map_value is not distinct */
14693         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD ||
14694                    insn->src_reg == BPF_PSEUDO_MAP_IDX) {
14695                 dst_reg->type = CONST_PTR_TO_MAP;
14696         } else {
14697                 verbose(env, "bpf verifier is misconfigured\n");
14698                 return -EINVAL;
14699         }
14700
14701         return 0;
14702 }
14703
14704 static bool may_access_skb(enum bpf_prog_type type)
14705 {
14706         switch (type) {
14707         case BPF_PROG_TYPE_SOCKET_FILTER:
14708         case BPF_PROG_TYPE_SCHED_CLS:
14709         case BPF_PROG_TYPE_SCHED_ACT:
14710                 return true;
14711         default:
14712                 return false;
14713         }
14714 }
14715
14716 /* verify safety of LD_ABS|LD_IND instructions:
14717  * - they can only appear in the programs where ctx == skb
14718  * - since they are wrappers of function calls, they scratch R1-R5 registers,
14719  *   preserve R6-R9, and store return value into R0
14720  *
14721  * Implicit input:
14722  *   ctx == skb == R6 == CTX
14723  *
14724  * Explicit input:
14725  *   SRC == any register
14726  *   IMM == 32-bit immediate
14727  *
14728  * Output:
14729  *   R0 - 8/16/32-bit skb data converted to cpu endianness
14730  */
14731 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
14732 {
14733         struct bpf_reg_state *regs = cur_regs(env);
14734         static const int ctx_reg = BPF_REG_6;
14735         u8 mode = BPF_MODE(insn->code);
14736         int i, err;
14737
14738         if (!may_access_skb(resolve_prog_type(env->prog))) {
14739                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
14740                 return -EINVAL;
14741         }
14742
14743         if (!env->ops->gen_ld_abs) {
14744                 verbose(env, "bpf verifier is misconfigured\n");
14745                 return -EINVAL;
14746         }
14747
14748         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
14749             BPF_SIZE(insn->code) == BPF_DW ||
14750             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
14751                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
14752                 return -EINVAL;
14753         }
14754
14755         /* check whether implicit source operand (register R6) is readable */
14756         err = check_reg_arg(env, ctx_reg, SRC_OP);
14757         if (err)
14758                 return err;
14759
14760         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
14761          * gen_ld_abs() may terminate the program at runtime, leading to
14762          * reference leak.
14763          */
14764         err = check_reference_leak(env);
14765         if (err) {
14766                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
14767                 return err;
14768         }
14769
14770         if (env->cur_state->active_lock.ptr) {
14771                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
14772                 return -EINVAL;
14773         }
14774
14775         if (env->cur_state->active_rcu_lock) {
14776                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_rcu_read_lock-ed region\n");
14777                 return -EINVAL;
14778         }
14779
14780         if (regs[ctx_reg].type != PTR_TO_CTX) {
14781                 verbose(env,
14782                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
14783                 return -EINVAL;
14784         }
14785
14786         if (mode == BPF_IND) {
14787                 /* check explicit source operand */
14788                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
14789                 if (err)
14790                         return err;
14791         }
14792
14793         err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
14794         if (err < 0)
14795                 return err;
14796
14797         /* reset caller saved regs to unreadable */
14798         for (i = 0; i < CALLER_SAVED_REGS; i++) {
14799                 mark_reg_not_init(env, regs, caller_saved[i]);
14800                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
14801         }
14802
14803         /* mark destination R0 register as readable, since it contains
14804          * the value fetched from the packet.
14805          * Already marked as written above.
14806          */
14807         mark_reg_unknown(env, regs, BPF_REG_0);
14808         /* ld_abs load up to 32-bit skb data. */
14809         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
14810         return 0;
14811 }
14812
14813 static int check_return_code(struct bpf_verifier_env *env)
14814 {
14815         struct tnum enforce_attach_type_range = tnum_unknown;
14816         const struct bpf_prog *prog = env->prog;
14817         struct bpf_reg_state *reg;
14818         struct tnum range = tnum_range(0, 1), const_0 = tnum_const(0);
14819         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
14820         int err;
14821         struct bpf_func_state *frame = env->cur_state->frame[0];
14822         const bool is_subprog = frame->subprogno;
14823
14824         /* LSM and struct_ops func-ptr's return type could be "void" */
14825         if (!is_subprog) {
14826                 switch (prog_type) {
14827                 case BPF_PROG_TYPE_LSM:
14828                         if (prog->expected_attach_type == BPF_LSM_CGROUP)
14829                                 /* See below, can be 0 or 0-1 depending on hook. */
14830                                 break;
14831                         fallthrough;
14832                 case BPF_PROG_TYPE_STRUCT_OPS:
14833                         if (!prog->aux->attach_func_proto->type)
14834                                 return 0;
14835                         break;
14836                 default:
14837                         break;
14838                 }
14839         }
14840
14841         /* eBPF calling convention is such that R0 is used
14842          * to return the value from eBPF program.
14843          * Make sure that it's readable at this time
14844          * of bpf_exit, which means that program wrote
14845          * something into it earlier
14846          */
14847         err = check_reg_arg(env, BPF_REG_0, SRC_OP);
14848         if (err)
14849                 return err;
14850
14851         if (is_pointer_value(env, BPF_REG_0)) {
14852                 verbose(env, "R0 leaks addr as return value\n");
14853                 return -EACCES;
14854         }
14855
14856         reg = cur_regs(env) + BPF_REG_0;
14857
14858         if (frame->in_async_callback_fn) {
14859                 /* enforce return zero from async callbacks like timer */
14860                 if (reg->type != SCALAR_VALUE) {
14861                         verbose(env, "In async callback the register R0 is not a known value (%s)\n",
14862                                 reg_type_str(env, reg->type));
14863                         return -EINVAL;
14864                 }
14865
14866                 if (!tnum_in(const_0, reg->var_off)) {
14867                         verbose_invalid_scalar(env, reg, &const_0, "async callback", "R0");
14868                         return -EINVAL;
14869                 }
14870                 return 0;
14871         }
14872
14873         if (is_subprog) {
14874                 if (reg->type != SCALAR_VALUE) {
14875                         verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
14876                                 reg_type_str(env, reg->type));
14877                         return -EINVAL;
14878                 }
14879                 return 0;
14880         }
14881
14882         switch (prog_type) {
14883         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
14884                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
14885                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
14886                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
14887                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
14888                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
14889                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
14890                         range = tnum_range(1, 1);
14891                 if (env->prog->expected_attach_type == BPF_CGROUP_INET4_BIND ||
14892                     env->prog->expected_attach_type == BPF_CGROUP_INET6_BIND)
14893                         range = tnum_range(0, 3);
14894                 break;
14895         case BPF_PROG_TYPE_CGROUP_SKB:
14896                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
14897                         range = tnum_range(0, 3);
14898                         enforce_attach_type_range = tnum_range(2, 3);
14899                 }
14900                 break;
14901         case BPF_PROG_TYPE_CGROUP_SOCK:
14902         case BPF_PROG_TYPE_SOCK_OPS:
14903         case BPF_PROG_TYPE_CGROUP_DEVICE:
14904         case BPF_PROG_TYPE_CGROUP_SYSCTL:
14905         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
14906                 break;
14907         case BPF_PROG_TYPE_RAW_TRACEPOINT:
14908                 if (!env->prog->aux->attach_btf_id)
14909                         return 0;
14910                 range = tnum_const(0);
14911                 break;
14912         case BPF_PROG_TYPE_TRACING:
14913                 switch (env->prog->expected_attach_type) {
14914                 case BPF_TRACE_FENTRY:
14915                 case BPF_TRACE_FEXIT:
14916                         range = tnum_const(0);
14917                         break;
14918                 case BPF_TRACE_RAW_TP:
14919                 case BPF_MODIFY_RETURN:
14920                         return 0;
14921                 case BPF_TRACE_ITER:
14922                         break;
14923                 default:
14924                         return -ENOTSUPP;
14925                 }
14926                 break;
14927         case BPF_PROG_TYPE_SK_LOOKUP:
14928                 range = tnum_range(SK_DROP, SK_PASS);
14929                 break;
14930
14931         case BPF_PROG_TYPE_LSM:
14932                 if (env->prog->expected_attach_type != BPF_LSM_CGROUP) {
14933                         /* Regular BPF_PROG_TYPE_LSM programs can return
14934                          * any value.
14935                          */
14936                         return 0;
14937                 }
14938                 if (!env->prog->aux->attach_func_proto->type) {
14939                         /* Make sure programs that attach to void
14940                          * hooks don't try to modify return value.
14941                          */
14942                         range = tnum_range(1, 1);
14943                 }
14944                 break;
14945
14946         case BPF_PROG_TYPE_NETFILTER:
14947                 range = tnum_range(NF_DROP, NF_ACCEPT);
14948                 break;
14949         case BPF_PROG_TYPE_EXT:
14950                 /* freplace program can return anything as its return value
14951                  * depends on the to-be-replaced kernel func or bpf program.
14952                  */
14953         default:
14954                 return 0;
14955         }
14956
14957         if (reg->type != SCALAR_VALUE) {
14958                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
14959                         reg_type_str(env, reg->type));
14960                 return -EINVAL;
14961         }
14962
14963         if (!tnum_in(range, reg->var_off)) {
14964                 verbose_invalid_scalar(env, reg, &range, "program exit", "R0");
14965                 if (prog->expected_attach_type == BPF_LSM_CGROUP &&
14966                     prog_type == BPF_PROG_TYPE_LSM &&
14967                     !prog->aux->attach_func_proto->type)
14968                         verbose(env, "Note, BPF_LSM_CGROUP that attach to void LSM hooks can't modify return value!\n");
14969                 return -EINVAL;
14970         }
14971
14972         if (!tnum_is_unknown(enforce_attach_type_range) &&
14973             tnum_in(enforce_attach_type_range, reg->var_off))
14974                 env->prog->enforce_expected_attach_type = 1;
14975         return 0;
14976 }
14977
14978 /* non-recursive DFS pseudo code
14979  * 1  procedure DFS-iterative(G,v):
14980  * 2      label v as discovered
14981  * 3      let S be a stack
14982  * 4      S.push(v)
14983  * 5      while S is not empty
14984  * 6            t <- S.peek()
14985  * 7            if t is what we're looking for:
14986  * 8                return t
14987  * 9            for all edges e in G.adjacentEdges(t) do
14988  * 10               if edge e is already labelled
14989  * 11                   continue with the next edge
14990  * 12               w <- G.adjacentVertex(t,e)
14991  * 13               if vertex w is not discovered and not explored
14992  * 14                   label e as tree-edge
14993  * 15                   label w as discovered
14994  * 16                   S.push(w)
14995  * 17                   continue at 5
14996  * 18               else if vertex w is discovered
14997  * 19                   label e as back-edge
14998  * 20               else
14999  * 21                   // vertex w is explored
15000  * 22                   label e as forward- or cross-edge
15001  * 23           label t as explored
15002  * 24           S.pop()
15003  *
15004  * convention:
15005  * 0x10 - discovered
15006  * 0x11 - discovered and fall-through edge labelled
15007  * 0x12 - discovered and fall-through and branch edges labelled
15008  * 0x20 - explored
15009  */
15010
15011 enum {
15012         DISCOVERED = 0x10,
15013         EXPLORED = 0x20,
15014         FALLTHROUGH = 1,
15015         BRANCH = 2,
15016 };
15017
15018 static void mark_prune_point(struct bpf_verifier_env *env, int idx)
15019 {
15020         env->insn_aux_data[idx].prune_point = true;
15021 }
15022
15023 static bool is_prune_point(struct bpf_verifier_env *env, int insn_idx)
15024 {
15025         return env->insn_aux_data[insn_idx].prune_point;
15026 }
15027
15028 static void mark_force_checkpoint(struct bpf_verifier_env *env, int idx)
15029 {
15030         env->insn_aux_data[idx].force_checkpoint = true;
15031 }
15032
15033 static bool is_force_checkpoint(struct bpf_verifier_env *env, int insn_idx)
15034 {
15035         return env->insn_aux_data[insn_idx].force_checkpoint;
15036 }
15037
15038
15039 enum {
15040         DONE_EXPLORING = 0,
15041         KEEP_EXPLORING = 1,
15042 };
15043
15044 /* t, w, e - match pseudo-code above:
15045  * t - index of current instruction
15046  * w - next instruction
15047  * e - edge
15048  */
15049 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
15050 {
15051         int *insn_stack = env->cfg.insn_stack;
15052         int *insn_state = env->cfg.insn_state;
15053
15054         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
15055                 return DONE_EXPLORING;
15056
15057         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
15058                 return DONE_EXPLORING;
15059
15060         if (w < 0 || w >= env->prog->len) {
15061                 verbose_linfo(env, t, "%d: ", t);
15062                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
15063                 return -EINVAL;
15064         }
15065
15066         if (e == BRANCH) {
15067                 /* mark branch target for state pruning */
15068                 mark_prune_point(env, w);
15069                 mark_jmp_point(env, w);
15070         }
15071
15072         if (insn_state[w] == 0) {
15073                 /* tree-edge */
15074                 insn_state[t] = DISCOVERED | e;
15075                 insn_state[w] = DISCOVERED;
15076                 if (env->cfg.cur_stack >= env->prog->len)
15077                         return -E2BIG;
15078                 insn_stack[env->cfg.cur_stack++] = w;
15079                 return KEEP_EXPLORING;
15080         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
15081                 if (env->bpf_capable)
15082                         return DONE_EXPLORING;
15083                 verbose_linfo(env, t, "%d: ", t);
15084                 verbose_linfo(env, w, "%d: ", w);
15085                 verbose(env, "back-edge from insn %d to %d\n", t, w);
15086                 return -EINVAL;
15087         } else if (insn_state[w] == EXPLORED) {
15088                 /* forward- or cross-edge */
15089                 insn_state[t] = DISCOVERED | e;
15090         } else {
15091                 verbose(env, "insn state internal bug\n");
15092                 return -EFAULT;
15093         }
15094         return DONE_EXPLORING;
15095 }
15096
15097 static int visit_func_call_insn(int t, struct bpf_insn *insns,
15098                                 struct bpf_verifier_env *env,
15099                                 bool visit_callee)
15100 {
15101         int ret, insn_sz;
15102
15103         insn_sz = bpf_is_ldimm64(&insns[t]) ? 2 : 1;
15104         ret = push_insn(t, t + insn_sz, FALLTHROUGH, env);
15105         if (ret)
15106                 return ret;
15107
15108         mark_prune_point(env, t + insn_sz);
15109         /* when we exit from subprog, we need to record non-linear history */
15110         mark_jmp_point(env, t + insn_sz);
15111
15112         if (visit_callee) {
15113                 mark_prune_point(env, t);
15114                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
15115         }
15116         return ret;
15117 }
15118
15119 /* Visits the instruction at index t and returns one of the following:
15120  *  < 0 - an error occurred
15121  *  DONE_EXPLORING - the instruction was fully explored
15122  *  KEEP_EXPLORING - there is still work to be done before it is fully explored
15123  */
15124 static int visit_insn(int t, struct bpf_verifier_env *env)
15125 {
15126         struct bpf_insn *insns = env->prog->insnsi, *insn = &insns[t];
15127         int ret, off, insn_sz;
15128
15129         if (bpf_pseudo_func(insn))
15130                 return visit_func_call_insn(t, insns, env, true);
15131
15132         /* All non-branch instructions have a single fall-through edge. */
15133         if (BPF_CLASS(insn->code) != BPF_JMP &&
15134             BPF_CLASS(insn->code) != BPF_JMP32) {
15135                 insn_sz = bpf_is_ldimm64(insn) ? 2 : 1;
15136                 return push_insn(t, t + insn_sz, FALLTHROUGH, env);
15137         }
15138
15139         switch (BPF_OP(insn->code)) {
15140         case BPF_EXIT:
15141                 return DONE_EXPLORING;
15142
15143         case BPF_CALL:
15144                 if (insn->src_reg == 0 && insn->imm == BPF_FUNC_timer_set_callback)
15145                         /* Mark this call insn as a prune point to trigger
15146                          * is_state_visited() check before call itself is
15147                          * processed by __check_func_call(). Otherwise new
15148                          * async state will be pushed for further exploration.
15149                          */
15150                         mark_prune_point(env, t);
15151                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
15152                         struct bpf_kfunc_call_arg_meta meta;
15153
15154                         ret = fetch_kfunc_meta(env, insn, &meta, NULL);
15155                         if (ret == 0 && is_iter_next_kfunc(&meta)) {
15156                                 mark_prune_point(env, t);
15157                                 /* Checking and saving state checkpoints at iter_next() call
15158                                  * is crucial for fast convergence of open-coded iterator loop
15159                                  * logic, so we need to force it. If we don't do that,
15160                                  * is_state_visited() might skip saving a checkpoint, causing
15161                                  * unnecessarily long sequence of not checkpointed
15162                                  * instructions and jumps, leading to exhaustion of jump
15163                                  * history buffer, and potentially other undesired outcomes.
15164                                  * It is expected that with correct open-coded iterators
15165                                  * convergence will happen quickly, so we don't run a risk of
15166                                  * exhausting memory.
15167                                  */
15168                                 mark_force_checkpoint(env, t);
15169                         }
15170                 }
15171                 return visit_func_call_insn(t, insns, env, insn->src_reg == BPF_PSEUDO_CALL);
15172
15173         case BPF_JA:
15174                 if (BPF_SRC(insn->code) != BPF_K)
15175                         return -EINVAL;
15176
15177                 if (BPF_CLASS(insn->code) == BPF_JMP)
15178                         off = insn->off;
15179                 else
15180                         off = insn->imm;
15181
15182                 /* unconditional jump with single edge */
15183                 ret = push_insn(t, t + off + 1, FALLTHROUGH, env);
15184                 if (ret)
15185                         return ret;
15186
15187                 mark_prune_point(env, t + off + 1);
15188                 mark_jmp_point(env, t + off + 1);
15189
15190                 return ret;
15191
15192         default:
15193                 /* conditional jump with two edges */
15194                 mark_prune_point(env, t);
15195
15196                 ret = push_insn(t, t + 1, FALLTHROUGH, env);
15197                 if (ret)
15198                         return ret;
15199
15200                 return push_insn(t, t + insn->off + 1, BRANCH, env);
15201         }
15202 }
15203
15204 /* non-recursive depth-first-search to detect loops in BPF program
15205  * loop == back-edge in directed graph
15206  */
15207 static int check_cfg(struct bpf_verifier_env *env)
15208 {
15209         int insn_cnt = env->prog->len;
15210         int *insn_stack, *insn_state;
15211         int ret = 0;
15212         int i;
15213
15214         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
15215         if (!insn_state)
15216                 return -ENOMEM;
15217
15218         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
15219         if (!insn_stack) {
15220                 kvfree(insn_state);
15221                 return -ENOMEM;
15222         }
15223
15224         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
15225         insn_stack[0] = 0; /* 0 is the first instruction */
15226         env->cfg.cur_stack = 1;
15227
15228         while (env->cfg.cur_stack > 0) {
15229                 int t = insn_stack[env->cfg.cur_stack - 1];
15230
15231                 ret = visit_insn(t, env);
15232                 switch (ret) {
15233                 case DONE_EXPLORING:
15234                         insn_state[t] = EXPLORED;
15235                         env->cfg.cur_stack--;
15236                         break;
15237                 case KEEP_EXPLORING:
15238                         break;
15239                 default:
15240                         if (ret > 0) {
15241                                 verbose(env, "visit_insn internal bug\n");
15242                                 ret = -EFAULT;
15243                         }
15244                         goto err_free;
15245                 }
15246         }
15247
15248         if (env->cfg.cur_stack < 0) {
15249                 verbose(env, "pop stack internal bug\n");
15250                 ret = -EFAULT;
15251                 goto err_free;
15252         }
15253
15254         for (i = 0; i < insn_cnt; i++) {
15255                 struct bpf_insn *insn = &env->prog->insnsi[i];
15256
15257                 if (insn_state[i] != EXPLORED) {
15258                         verbose(env, "unreachable insn %d\n", i);
15259                         ret = -EINVAL;
15260                         goto err_free;
15261                 }
15262                 if (bpf_is_ldimm64(insn)) {
15263                         if (insn_state[i + 1] != 0) {
15264                                 verbose(env, "jump into the middle of ldimm64 insn %d\n", i);
15265                                 ret = -EINVAL;
15266                                 goto err_free;
15267                         }
15268                         i++; /* skip second half of ldimm64 */
15269                 }
15270         }
15271         ret = 0; /* cfg looks good */
15272
15273 err_free:
15274         kvfree(insn_state);
15275         kvfree(insn_stack);
15276         env->cfg.insn_state = env->cfg.insn_stack = NULL;
15277         return ret;
15278 }
15279
15280 static int check_abnormal_return(struct bpf_verifier_env *env)
15281 {
15282         int i;
15283
15284         for (i = 1; i < env->subprog_cnt; i++) {
15285                 if (env->subprog_info[i].has_ld_abs) {
15286                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
15287                         return -EINVAL;
15288                 }
15289                 if (env->subprog_info[i].has_tail_call) {
15290                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
15291                         return -EINVAL;
15292                 }
15293         }
15294         return 0;
15295 }
15296
15297 /* The minimum supported BTF func info size */
15298 #define MIN_BPF_FUNCINFO_SIZE   8
15299 #define MAX_FUNCINFO_REC_SIZE   252
15300
15301 static int check_btf_func(struct bpf_verifier_env *env,
15302                           const union bpf_attr *attr,
15303                           bpfptr_t uattr)
15304 {
15305         const struct btf_type *type, *func_proto, *ret_type;
15306         u32 i, nfuncs, urec_size, min_size;
15307         u32 krec_size = sizeof(struct bpf_func_info);
15308         struct bpf_func_info *krecord;
15309         struct bpf_func_info_aux *info_aux = NULL;
15310         struct bpf_prog *prog;
15311         const struct btf *btf;
15312         bpfptr_t urecord;
15313         u32 prev_offset = 0;
15314         bool scalar_return;
15315         int ret = -ENOMEM;
15316
15317         nfuncs = attr->func_info_cnt;
15318         if (!nfuncs) {
15319                 if (check_abnormal_return(env))
15320                         return -EINVAL;
15321                 return 0;
15322         }
15323
15324         if (nfuncs != env->subprog_cnt) {
15325                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
15326                 return -EINVAL;
15327         }
15328
15329         urec_size = attr->func_info_rec_size;
15330         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
15331             urec_size > MAX_FUNCINFO_REC_SIZE ||
15332             urec_size % sizeof(u32)) {
15333                 verbose(env, "invalid func info rec size %u\n", urec_size);
15334                 return -EINVAL;
15335         }
15336
15337         prog = env->prog;
15338         btf = prog->aux->btf;
15339
15340         urecord = make_bpfptr(attr->func_info, uattr.is_kernel);
15341         min_size = min_t(u32, krec_size, urec_size);
15342
15343         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
15344         if (!krecord)
15345                 return -ENOMEM;
15346         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
15347         if (!info_aux)
15348                 goto err_free;
15349
15350         for (i = 0; i < nfuncs; i++) {
15351                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
15352                 if (ret) {
15353                         if (ret == -E2BIG) {
15354                                 verbose(env, "nonzero tailing record in func info");
15355                                 /* set the size kernel expects so loader can zero
15356                                  * out the rest of the record.
15357                                  */
15358                                 if (copy_to_bpfptr_offset(uattr,
15359                                                           offsetof(union bpf_attr, func_info_rec_size),
15360                                                           &min_size, sizeof(min_size)))
15361                                         ret = -EFAULT;
15362                         }
15363                         goto err_free;
15364                 }
15365
15366                 if (copy_from_bpfptr(&krecord[i], urecord, min_size)) {
15367                         ret = -EFAULT;
15368                         goto err_free;
15369                 }
15370
15371                 /* check insn_off */
15372                 ret = -EINVAL;
15373                 if (i == 0) {
15374                         if (krecord[i].insn_off) {
15375                                 verbose(env,
15376                                         "nonzero insn_off %u for the first func info record",
15377                                         krecord[i].insn_off);
15378                                 goto err_free;
15379                         }
15380                 } else if (krecord[i].insn_off <= prev_offset) {
15381                         verbose(env,
15382                                 "same or smaller insn offset (%u) than previous func info record (%u)",
15383                                 krecord[i].insn_off, prev_offset);
15384                         goto err_free;
15385                 }
15386
15387                 if (env->subprog_info[i].start != krecord[i].insn_off) {
15388                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
15389                         goto err_free;
15390                 }
15391
15392                 /* check type_id */
15393                 type = btf_type_by_id(btf, krecord[i].type_id);
15394                 if (!type || !btf_type_is_func(type)) {
15395                         verbose(env, "invalid type id %d in func info",
15396                                 krecord[i].type_id);
15397                         goto err_free;
15398                 }
15399                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
15400
15401                 func_proto = btf_type_by_id(btf, type->type);
15402                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
15403                         /* btf_func_check() already verified it during BTF load */
15404                         goto err_free;
15405                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
15406                 scalar_return =
15407                         btf_type_is_small_int(ret_type) || btf_is_any_enum(ret_type);
15408                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
15409                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
15410                         goto err_free;
15411                 }
15412                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
15413                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
15414                         goto err_free;
15415                 }
15416
15417                 prev_offset = krecord[i].insn_off;
15418                 bpfptr_add(&urecord, urec_size);
15419         }
15420
15421         prog->aux->func_info = krecord;
15422         prog->aux->func_info_cnt = nfuncs;
15423         prog->aux->func_info_aux = info_aux;
15424         return 0;
15425
15426 err_free:
15427         kvfree(krecord);
15428         kfree(info_aux);
15429         return ret;
15430 }
15431
15432 static void adjust_btf_func(struct bpf_verifier_env *env)
15433 {
15434         struct bpf_prog_aux *aux = env->prog->aux;
15435         int i;
15436
15437         if (!aux->func_info)
15438                 return;
15439
15440         for (i = 0; i < env->subprog_cnt; i++)
15441                 aux->func_info[i].insn_off = env->subprog_info[i].start;
15442 }
15443
15444 #define MIN_BPF_LINEINFO_SIZE   offsetofend(struct bpf_line_info, line_col)
15445 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
15446
15447 static int check_btf_line(struct bpf_verifier_env *env,
15448                           const union bpf_attr *attr,
15449                           bpfptr_t uattr)
15450 {
15451         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
15452         struct bpf_subprog_info *sub;
15453         struct bpf_line_info *linfo;
15454         struct bpf_prog *prog;
15455         const struct btf *btf;
15456         bpfptr_t ulinfo;
15457         int err;
15458
15459         nr_linfo = attr->line_info_cnt;
15460         if (!nr_linfo)
15461                 return 0;
15462         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
15463                 return -EINVAL;
15464
15465         rec_size = attr->line_info_rec_size;
15466         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
15467             rec_size > MAX_LINEINFO_REC_SIZE ||
15468             rec_size & (sizeof(u32) - 1))
15469                 return -EINVAL;
15470
15471         /* Need to zero it in case the userspace may
15472          * pass in a smaller bpf_line_info object.
15473          */
15474         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
15475                          GFP_KERNEL | __GFP_NOWARN);
15476         if (!linfo)
15477                 return -ENOMEM;
15478
15479         prog = env->prog;
15480         btf = prog->aux->btf;
15481
15482         s = 0;
15483         sub = env->subprog_info;
15484         ulinfo = make_bpfptr(attr->line_info, uattr.is_kernel);
15485         expected_size = sizeof(struct bpf_line_info);
15486         ncopy = min_t(u32, expected_size, rec_size);
15487         for (i = 0; i < nr_linfo; i++) {
15488                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
15489                 if (err) {
15490                         if (err == -E2BIG) {
15491                                 verbose(env, "nonzero tailing record in line_info");
15492                                 if (copy_to_bpfptr_offset(uattr,
15493                                                           offsetof(union bpf_attr, line_info_rec_size),
15494                                                           &expected_size, sizeof(expected_size)))
15495                                         err = -EFAULT;
15496                         }
15497                         goto err_free;
15498                 }
15499
15500                 if (copy_from_bpfptr(&linfo[i], ulinfo, ncopy)) {
15501                         err = -EFAULT;
15502                         goto err_free;
15503                 }
15504
15505                 /*
15506                  * Check insn_off to ensure
15507                  * 1) strictly increasing AND
15508                  * 2) bounded by prog->len
15509                  *
15510                  * The linfo[0].insn_off == 0 check logically falls into
15511                  * the later "missing bpf_line_info for func..." case
15512                  * because the first linfo[0].insn_off must be the
15513                  * first sub also and the first sub must have
15514                  * subprog_info[0].start == 0.
15515                  */
15516                 if ((i && linfo[i].insn_off <= prev_offset) ||
15517                     linfo[i].insn_off >= prog->len) {
15518                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
15519                                 i, linfo[i].insn_off, prev_offset,
15520                                 prog->len);
15521                         err = -EINVAL;
15522                         goto err_free;
15523                 }
15524
15525                 if (!prog->insnsi[linfo[i].insn_off].code) {
15526                         verbose(env,
15527                                 "Invalid insn code at line_info[%u].insn_off\n",
15528                                 i);
15529                         err = -EINVAL;
15530                         goto err_free;
15531                 }
15532
15533                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
15534                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
15535                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
15536                         err = -EINVAL;
15537                         goto err_free;
15538                 }
15539
15540                 if (s != env->subprog_cnt) {
15541                         if (linfo[i].insn_off == sub[s].start) {
15542                                 sub[s].linfo_idx = i;
15543                                 s++;
15544                         } else if (sub[s].start < linfo[i].insn_off) {
15545                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
15546                                 err = -EINVAL;
15547                                 goto err_free;
15548                         }
15549                 }
15550
15551                 prev_offset = linfo[i].insn_off;
15552                 bpfptr_add(&ulinfo, rec_size);
15553         }
15554
15555         if (s != env->subprog_cnt) {
15556                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
15557                         env->subprog_cnt - s, s);
15558                 err = -EINVAL;
15559                 goto err_free;
15560         }
15561
15562         prog->aux->linfo = linfo;
15563         prog->aux->nr_linfo = nr_linfo;
15564
15565         return 0;
15566
15567 err_free:
15568         kvfree(linfo);
15569         return err;
15570 }
15571
15572 #define MIN_CORE_RELO_SIZE      sizeof(struct bpf_core_relo)
15573 #define MAX_CORE_RELO_SIZE      MAX_FUNCINFO_REC_SIZE
15574
15575 static int check_core_relo(struct bpf_verifier_env *env,
15576                            const union bpf_attr *attr,
15577                            bpfptr_t uattr)
15578 {
15579         u32 i, nr_core_relo, ncopy, expected_size, rec_size;
15580         struct bpf_core_relo core_relo = {};
15581         struct bpf_prog *prog = env->prog;
15582         const struct btf *btf = prog->aux->btf;
15583         struct bpf_core_ctx ctx = {
15584                 .log = &env->log,
15585                 .btf = btf,
15586         };
15587         bpfptr_t u_core_relo;
15588         int err;
15589
15590         nr_core_relo = attr->core_relo_cnt;
15591         if (!nr_core_relo)
15592                 return 0;
15593         if (nr_core_relo > INT_MAX / sizeof(struct bpf_core_relo))
15594                 return -EINVAL;
15595
15596         rec_size = attr->core_relo_rec_size;
15597         if (rec_size < MIN_CORE_RELO_SIZE ||
15598             rec_size > MAX_CORE_RELO_SIZE ||
15599             rec_size % sizeof(u32))
15600                 return -EINVAL;
15601
15602         u_core_relo = make_bpfptr(attr->core_relos, uattr.is_kernel);
15603         expected_size = sizeof(struct bpf_core_relo);
15604         ncopy = min_t(u32, expected_size, rec_size);
15605
15606         /* Unlike func_info and line_info, copy and apply each CO-RE
15607          * relocation record one at a time.
15608          */
15609         for (i = 0; i < nr_core_relo; i++) {
15610                 /* future proofing when sizeof(bpf_core_relo) changes */
15611                 err = bpf_check_uarg_tail_zero(u_core_relo, expected_size, rec_size);
15612                 if (err) {
15613                         if (err == -E2BIG) {
15614                                 verbose(env, "nonzero tailing record in core_relo");
15615                                 if (copy_to_bpfptr_offset(uattr,
15616                                                           offsetof(union bpf_attr, core_relo_rec_size),
15617                                                           &expected_size, sizeof(expected_size)))
15618                                         err = -EFAULT;
15619                         }
15620                         break;
15621                 }
15622
15623                 if (copy_from_bpfptr(&core_relo, u_core_relo, ncopy)) {
15624                         err = -EFAULT;
15625                         break;
15626                 }
15627
15628                 if (core_relo.insn_off % 8 || core_relo.insn_off / 8 >= prog->len) {
15629                         verbose(env, "Invalid core_relo[%u].insn_off:%u prog->len:%u\n",
15630                                 i, core_relo.insn_off, prog->len);
15631                         err = -EINVAL;
15632                         break;
15633                 }
15634
15635                 err = bpf_core_apply(&ctx, &core_relo, i,
15636                                      &prog->insnsi[core_relo.insn_off / 8]);
15637                 if (err)
15638                         break;
15639                 bpfptr_add(&u_core_relo, rec_size);
15640         }
15641         return err;
15642 }
15643
15644 static int check_btf_info(struct bpf_verifier_env *env,
15645                           const union bpf_attr *attr,
15646                           bpfptr_t uattr)
15647 {
15648         struct btf *btf;
15649         int err;
15650
15651         if (!attr->func_info_cnt && !attr->line_info_cnt) {
15652                 if (check_abnormal_return(env))
15653                         return -EINVAL;
15654                 return 0;
15655         }
15656
15657         btf = btf_get_by_fd(attr->prog_btf_fd);
15658         if (IS_ERR(btf))
15659                 return PTR_ERR(btf);
15660         if (btf_is_kernel(btf)) {
15661                 btf_put(btf);
15662                 return -EACCES;
15663         }
15664         env->prog->aux->btf = btf;
15665
15666         err = check_btf_func(env, attr, uattr);
15667         if (err)
15668                 return err;
15669
15670         err = check_btf_line(env, attr, uattr);
15671         if (err)
15672                 return err;
15673
15674         err = check_core_relo(env, attr, uattr);
15675         if (err)
15676                 return err;
15677
15678         return 0;
15679 }
15680
15681 /* check %cur's range satisfies %old's */
15682 static bool range_within(struct bpf_reg_state *old,
15683                          struct bpf_reg_state *cur)
15684 {
15685         return old->umin_value <= cur->umin_value &&
15686                old->umax_value >= cur->umax_value &&
15687                old->smin_value <= cur->smin_value &&
15688                old->smax_value >= cur->smax_value &&
15689                old->u32_min_value <= cur->u32_min_value &&
15690                old->u32_max_value >= cur->u32_max_value &&
15691                old->s32_min_value <= cur->s32_min_value &&
15692                old->s32_max_value >= cur->s32_max_value;
15693 }
15694
15695 /* If in the old state two registers had the same id, then they need to have
15696  * the same id in the new state as well.  But that id could be different from
15697  * the old state, so we need to track the mapping from old to new ids.
15698  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
15699  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
15700  * regs with a different old id could still have new id 9, we don't care about
15701  * that.
15702  * So we look through our idmap to see if this old id has been seen before.  If
15703  * so, we require the new id to match; otherwise, we add the id pair to the map.
15704  */
15705 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
15706 {
15707         struct bpf_id_pair *map = idmap->map;
15708         unsigned int i;
15709
15710         /* either both IDs should be set or both should be zero */
15711         if (!!old_id != !!cur_id)
15712                 return false;
15713
15714         if (old_id == 0) /* cur_id == 0 as well */
15715                 return true;
15716
15717         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
15718                 if (!map[i].old) {
15719                         /* Reached an empty slot; haven't seen this id before */
15720                         map[i].old = old_id;
15721                         map[i].cur = cur_id;
15722                         return true;
15723                 }
15724                 if (map[i].old == old_id)
15725                         return map[i].cur == cur_id;
15726                 if (map[i].cur == cur_id)
15727                         return false;
15728         }
15729         /* We ran out of idmap slots, which should be impossible */
15730         WARN_ON_ONCE(1);
15731         return false;
15732 }
15733
15734 /* Similar to check_ids(), but allocate a unique temporary ID
15735  * for 'old_id' or 'cur_id' of zero.
15736  * This makes pairs like '0 vs unique ID', 'unique ID vs 0' valid.
15737  */
15738 static bool check_scalar_ids(u32 old_id, u32 cur_id, struct bpf_idmap *idmap)
15739 {
15740         old_id = old_id ? old_id : ++idmap->tmp_id_gen;
15741         cur_id = cur_id ? cur_id : ++idmap->tmp_id_gen;
15742
15743         return check_ids(old_id, cur_id, idmap);
15744 }
15745
15746 static void clean_func_state(struct bpf_verifier_env *env,
15747                              struct bpf_func_state *st)
15748 {
15749         enum bpf_reg_liveness live;
15750         int i, j;
15751
15752         for (i = 0; i < BPF_REG_FP; i++) {
15753                 live = st->regs[i].live;
15754                 /* liveness must not touch this register anymore */
15755                 st->regs[i].live |= REG_LIVE_DONE;
15756                 if (!(live & REG_LIVE_READ))
15757                         /* since the register is unused, clear its state
15758                          * to make further comparison simpler
15759                          */
15760                         __mark_reg_not_init(env, &st->regs[i]);
15761         }
15762
15763         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
15764                 live = st->stack[i].spilled_ptr.live;
15765                 /* liveness must not touch this stack slot anymore */
15766                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
15767                 if (!(live & REG_LIVE_READ)) {
15768                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
15769                         for (j = 0; j < BPF_REG_SIZE; j++)
15770                                 st->stack[i].slot_type[j] = STACK_INVALID;
15771                 }
15772         }
15773 }
15774
15775 static void clean_verifier_state(struct bpf_verifier_env *env,
15776                                  struct bpf_verifier_state *st)
15777 {
15778         int i;
15779
15780         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
15781                 /* all regs in this state in all frames were already marked */
15782                 return;
15783
15784         for (i = 0; i <= st->curframe; i++)
15785                 clean_func_state(env, st->frame[i]);
15786 }
15787
15788 /* the parentage chains form a tree.
15789  * the verifier states are added to state lists at given insn and
15790  * pushed into state stack for future exploration.
15791  * when the verifier reaches bpf_exit insn some of the verifer states
15792  * stored in the state lists have their final liveness state already,
15793  * but a lot of states will get revised from liveness point of view when
15794  * the verifier explores other branches.
15795  * Example:
15796  * 1: r0 = 1
15797  * 2: if r1 == 100 goto pc+1
15798  * 3: r0 = 2
15799  * 4: exit
15800  * when the verifier reaches exit insn the register r0 in the state list of
15801  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
15802  * of insn 2 and goes exploring further. At the insn 4 it will walk the
15803  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
15804  *
15805  * Since the verifier pushes the branch states as it sees them while exploring
15806  * the program the condition of walking the branch instruction for the second
15807  * time means that all states below this branch were already explored and
15808  * their final liveness marks are already propagated.
15809  * Hence when the verifier completes the search of state list in is_state_visited()
15810  * we can call this clean_live_states() function to mark all liveness states
15811  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
15812  * will not be used.
15813  * This function also clears the registers and stack for states that !READ
15814  * to simplify state merging.
15815  *
15816  * Important note here that walking the same branch instruction in the callee
15817  * doesn't meant that the states are DONE. The verifier has to compare
15818  * the callsites
15819  */
15820 static void clean_live_states(struct bpf_verifier_env *env, int insn,
15821                               struct bpf_verifier_state *cur)
15822 {
15823         struct bpf_verifier_state_list *sl;
15824
15825         sl = *explored_state(env, insn);
15826         while (sl) {
15827                 if (sl->state.branches)
15828                         goto next;
15829                 if (sl->state.insn_idx != insn ||
15830                     !same_callsites(&sl->state, cur))
15831                         goto next;
15832                 clean_verifier_state(env, &sl->state);
15833 next:
15834                 sl = sl->next;
15835         }
15836 }
15837
15838 static bool regs_exact(const struct bpf_reg_state *rold,
15839                        const struct bpf_reg_state *rcur,
15840                        struct bpf_idmap *idmap)
15841 {
15842         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
15843                check_ids(rold->id, rcur->id, idmap) &&
15844                check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
15845 }
15846
15847 /* Returns true if (rold safe implies rcur safe) */
15848 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
15849                     struct bpf_reg_state *rcur, struct bpf_idmap *idmap, bool exact)
15850 {
15851         if (exact)
15852                 return regs_exact(rold, rcur, idmap);
15853
15854         if (!(rold->live & REG_LIVE_READ))
15855                 /* explored state didn't use this */
15856                 return true;
15857         if (rold->type == NOT_INIT)
15858                 /* explored state can't have used this */
15859                 return true;
15860         if (rcur->type == NOT_INIT)
15861                 return false;
15862
15863         /* Enforce that register types have to match exactly, including their
15864          * modifiers (like PTR_MAYBE_NULL, MEM_RDONLY, etc), as a general
15865          * rule.
15866          *
15867          * One can make a point that using a pointer register as unbounded
15868          * SCALAR would be technically acceptable, but this could lead to
15869          * pointer leaks because scalars are allowed to leak while pointers
15870          * are not. We could make this safe in special cases if root is
15871          * calling us, but it's probably not worth the hassle.
15872          *
15873          * Also, register types that are *not* MAYBE_NULL could technically be
15874          * safe to use as their MAYBE_NULL variants (e.g., PTR_TO_MAP_VALUE
15875          * is safe to be used as PTR_TO_MAP_VALUE_OR_NULL, provided both point
15876          * to the same map).
15877          * However, if the old MAYBE_NULL register then got NULL checked,
15878          * doing so could have affected others with the same id, and we can't
15879          * check for that because we lost the id when we converted to
15880          * a non-MAYBE_NULL variant.
15881          * So, as a general rule we don't allow mixing MAYBE_NULL and
15882          * non-MAYBE_NULL registers as well.
15883          */
15884         if (rold->type != rcur->type)
15885                 return false;
15886
15887         switch (base_type(rold->type)) {
15888         case SCALAR_VALUE:
15889                 if (env->explore_alu_limits) {
15890                         /* explore_alu_limits disables tnum_in() and range_within()
15891                          * logic and requires everything to be strict
15892                          */
15893                         return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
15894                                check_scalar_ids(rold->id, rcur->id, idmap);
15895                 }
15896                 if (!rold->precise)
15897                         return true;
15898                 /* Why check_ids() for scalar registers?
15899                  *
15900                  * Consider the following BPF code:
15901                  *   1: r6 = ... unbound scalar, ID=a ...
15902                  *   2: r7 = ... unbound scalar, ID=b ...
15903                  *   3: if (r6 > r7) goto +1
15904                  *   4: r6 = r7
15905                  *   5: if (r6 > X) goto ...
15906                  *   6: ... memory operation using r7 ...
15907                  *
15908                  * First verification path is [1-6]:
15909                  * - at (4) same bpf_reg_state::id (b) would be assigned to r6 and r7;
15910                  * - at (5) r6 would be marked <= X, find_equal_scalars() would also mark
15911                  *   r7 <= X, because r6 and r7 share same id.
15912                  * Next verification path is [1-4, 6].
15913                  *
15914                  * Instruction (6) would be reached in two states:
15915                  *   I.  r6{.id=b}, r7{.id=b} via path 1-6;
15916                  *   II. r6{.id=a}, r7{.id=b} via path 1-4, 6.
15917                  *
15918                  * Use check_ids() to distinguish these states.
15919                  * ---
15920                  * Also verify that new value satisfies old value range knowledge.
15921                  */
15922                 return range_within(rold, rcur) &&
15923                        tnum_in(rold->var_off, rcur->var_off) &&
15924                        check_scalar_ids(rold->id, rcur->id, idmap);
15925         case PTR_TO_MAP_KEY:
15926         case PTR_TO_MAP_VALUE:
15927         case PTR_TO_MEM:
15928         case PTR_TO_BUF:
15929         case PTR_TO_TP_BUFFER:
15930                 /* If the new min/max/var_off satisfy the old ones and
15931                  * everything else matches, we are OK.
15932                  */
15933                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, var_off)) == 0 &&
15934                        range_within(rold, rcur) &&
15935                        tnum_in(rold->var_off, rcur->var_off) &&
15936                        check_ids(rold->id, rcur->id, idmap) &&
15937                        check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
15938         case PTR_TO_PACKET_META:
15939         case PTR_TO_PACKET:
15940                 /* We must have at least as much range as the old ptr
15941                  * did, so that any accesses which were safe before are
15942                  * still safe.  This is true even if old range < old off,
15943                  * since someone could have accessed through (ptr - k), or
15944                  * even done ptr -= k in a register, to get a safe access.
15945                  */
15946                 if (rold->range > rcur->range)
15947                         return false;
15948                 /* If the offsets don't match, we can't trust our alignment;
15949                  * nor can we be sure that we won't fall out of range.
15950                  */
15951                 if (rold->off != rcur->off)
15952                         return false;
15953                 /* id relations must be preserved */
15954                 if (!check_ids(rold->id, rcur->id, idmap))
15955                         return false;
15956                 /* new val must satisfy old val knowledge */
15957                 return range_within(rold, rcur) &&
15958                        tnum_in(rold->var_off, rcur->var_off);
15959         case PTR_TO_STACK:
15960                 /* two stack pointers are equal only if they're pointing to
15961                  * the same stack frame, since fp-8 in foo != fp-8 in bar
15962                  */
15963                 return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
15964         default:
15965                 return regs_exact(rold, rcur, idmap);
15966         }
15967 }
15968
15969 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
15970                       struct bpf_func_state *cur, struct bpf_idmap *idmap, bool exact)
15971 {
15972         int i, spi;
15973
15974         /* walk slots of the explored stack and ignore any additional
15975          * slots in the current stack, since explored(safe) state
15976          * didn't use them
15977          */
15978         for (i = 0; i < old->allocated_stack; i++) {
15979                 struct bpf_reg_state *old_reg, *cur_reg;
15980
15981                 spi = i / BPF_REG_SIZE;
15982
15983                 if (exact &&
15984                     old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
15985                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
15986                         return false;
15987
15988                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) && !exact) {
15989                         i += BPF_REG_SIZE - 1;
15990                         /* explored state didn't use this */
15991                         continue;
15992                 }
15993
15994                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
15995                         continue;
15996
15997                 if (env->allow_uninit_stack &&
15998                     old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC)
15999                         continue;
16000
16001                 /* explored stack has more populated slots than current stack
16002                  * and these slots were used
16003                  */
16004                 if (i >= cur->allocated_stack)
16005                         return false;
16006
16007                 /* if old state was safe with misc data in the stack
16008                  * it will be safe with zero-initialized stack.
16009                  * The opposite is not true
16010                  */
16011                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
16012                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
16013                         continue;
16014                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
16015                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
16016                         /* Ex: old explored (safe) state has STACK_SPILL in
16017                          * this stack slot, but current has STACK_MISC ->
16018                          * this verifier states are not equivalent,
16019                          * return false to continue verification of this path
16020                          */
16021                         return false;
16022                 if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
16023                         continue;
16024                 /* Both old and cur are having same slot_type */
16025                 switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
16026                 case STACK_SPILL:
16027                         /* when explored and current stack slot are both storing
16028                          * spilled registers, check that stored pointers types
16029                          * are the same as well.
16030                          * Ex: explored safe path could have stored
16031                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
16032                          * but current path has stored:
16033                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
16034                          * such verifier states are not equivalent.
16035                          * return false to continue verification of this path
16036                          */
16037                         if (!regsafe(env, &old->stack[spi].spilled_ptr,
16038                                      &cur->stack[spi].spilled_ptr, idmap, exact))
16039                                 return false;
16040                         break;
16041                 case STACK_DYNPTR:
16042                         old_reg = &old->stack[spi].spilled_ptr;
16043                         cur_reg = &cur->stack[spi].spilled_ptr;
16044                         if (old_reg->dynptr.type != cur_reg->dynptr.type ||
16045                             old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
16046                             !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
16047                                 return false;
16048                         break;
16049                 case STACK_ITER:
16050                         old_reg = &old->stack[spi].spilled_ptr;
16051                         cur_reg = &cur->stack[spi].spilled_ptr;
16052                         /* iter.depth is not compared between states as it
16053                          * doesn't matter for correctness and would otherwise
16054                          * prevent convergence; we maintain it only to prevent
16055                          * infinite loop check triggering, see
16056                          * iter_active_depths_differ()
16057                          */
16058                         if (old_reg->iter.btf != cur_reg->iter.btf ||
16059                             old_reg->iter.btf_id != cur_reg->iter.btf_id ||
16060                             old_reg->iter.state != cur_reg->iter.state ||
16061                             /* ignore {old_reg,cur_reg}->iter.depth, see above */
16062                             !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
16063                                 return false;
16064                         break;
16065                 case STACK_MISC:
16066                 case STACK_ZERO:
16067                 case STACK_INVALID:
16068                         continue;
16069                 /* Ensure that new unhandled slot types return false by default */
16070                 default:
16071                         return false;
16072                 }
16073         }
16074         return true;
16075 }
16076
16077 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur,
16078                     struct bpf_idmap *idmap)
16079 {
16080         int i;
16081
16082         if (old->acquired_refs != cur->acquired_refs)
16083                 return false;
16084
16085         for (i = 0; i < old->acquired_refs; i++) {
16086                 if (!check_ids(old->refs[i].id, cur->refs[i].id, idmap))
16087                         return false;
16088         }
16089
16090         return true;
16091 }
16092
16093 /* compare two verifier states
16094  *
16095  * all states stored in state_list are known to be valid, since
16096  * verifier reached 'bpf_exit' instruction through them
16097  *
16098  * this function is called when verifier exploring different branches of
16099  * execution popped from the state stack. If it sees an old state that has
16100  * more strict register state and more strict stack state then this execution
16101  * branch doesn't need to be explored further, since verifier already
16102  * concluded that more strict state leads to valid finish.
16103  *
16104  * Therefore two states are equivalent if register state is more conservative
16105  * and explored stack state is more conservative than the current one.
16106  * Example:
16107  *       explored                   current
16108  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
16109  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
16110  *
16111  * In other words if current stack state (one being explored) has more
16112  * valid slots than old one that already passed validation, it means
16113  * the verifier can stop exploring and conclude that current state is valid too
16114  *
16115  * Similarly with registers. If explored state has register type as invalid
16116  * whereas register type in current state is meaningful, it means that
16117  * the current state will reach 'bpf_exit' instruction safely
16118  */
16119 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
16120                               struct bpf_func_state *cur, bool exact)
16121 {
16122         int i;
16123
16124         for (i = 0; i < MAX_BPF_REG; i++)
16125                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
16126                              &env->idmap_scratch, exact))
16127                         return false;
16128
16129         if (!stacksafe(env, old, cur, &env->idmap_scratch, exact))
16130                 return false;
16131
16132         if (!refsafe(old, cur, &env->idmap_scratch))
16133                 return false;
16134
16135         return true;
16136 }
16137
16138 static void reset_idmap_scratch(struct bpf_verifier_env *env)
16139 {
16140         env->idmap_scratch.tmp_id_gen = env->id_gen;
16141         memset(&env->idmap_scratch.map, 0, sizeof(env->idmap_scratch.map));
16142 }
16143
16144 static bool states_equal(struct bpf_verifier_env *env,
16145                          struct bpf_verifier_state *old,
16146                          struct bpf_verifier_state *cur,
16147                          bool exact)
16148 {
16149         int i;
16150
16151         if (old->curframe != cur->curframe)
16152                 return false;
16153
16154         reset_idmap_scratch(env);
16155
16156         /* Verification state from speculative execution simulation
16157          * must never prune a non-speculative execution one.
16158          */
16159         if (old->speculative && !cur->speculative)
16160                 return false;
16161
16162         if (old->active_lock.ptr != cur->active_lock.ptr)
16163                 return false;
16164
16165         /* Old and cur active_lock's have to be either both present
16166          * or both absent.
16167          */
16168         if (!!old->active_lock.id != !!cur->active_lock.id)
16169                 return false;
16170
16171         if (old->active_lock.id &&
16172             !check_ids(old->active_lock.id, cur->active_lock.id, &env->idmap_scratch))
16173                 return false;
16174
16175         if (old->active_rcu_lock != cur->active_rcu_lock)
16176                 return false;
16177
16178         /* for states to be equal callsites have to be the same
16179          * and all frame states need to be equivalent
16180          */
16181         for (i = 0; i <= old->curframe; i++) {
16182                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
16183                         return false;
16184                 if (!func_states_equal(env, old->frame[i], cur->frame[i], exact))
16185                         return false;
16186         }
16187         return true;
16188 }
16189
16190 /* Return 0 if no propagation happened. Return negative error code if error
16191  * happened. Otherwise, return the propagated bit.
16192  */
16193 static int propagate_liveness_reg(struct bpf_verifier_env *env,
16194                                   struct bpf_reg_state *reg,
16195                                   struct bpf_reg_state *parent_reg)
16196 {
16197         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
16198         u8 flag = reg->live & REG_LIVE_READ;
16199         int err;
16200
16201         /* When comes here, read flags of PARENT_REG or REG could be any of
16202          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
16203          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
16204          */
16205         if (parent_flag == REG_LIVE_READ64 ||
16206             /* Or if there is no read flag from REG. */
16207             !flag ||
16208             /* Or if the read flag from REG is the same as PARENT_REG. */
16209             parent_flag == flag)
16210                 return 0;
16211
16212         err = mark_reg_read(env, reg, parent_reg, flag);
16213         if (err)
16214                 return err;
16215
16216         return flag;
16217 }
16218
16219 /* A write screens off any subsequent reads; but write marks come from the
16220  * straight-line code between a state and its parent.  When we arrive at an
16221  * equivalent state (jump target or such) we didn't arrive by the straight-line
16222  * code, so read marks in the state must propagate to the parent regardless
16223  * of the state's write marks. That's what 'parent == state->parent' comparison
16224  * in mark_reg_read() is for.
16225  */
16226 static int propagate_liveness(struct bpf_verifier_env *env,
16227                               const struct bpf_verifier_state *vstate,
16228                               struct bpf_verifier_state *vparent)
16229 {
16230         struct bpf_reg_state *state_reg, *parent_reg;
16231         struct bpf_func_state *state, *parent;
16232         int i, frame, err = 0;
16233
16234         if (vparent->curframe != vstate->curframe) {
16235                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
16236                      vparent->curframe, vstate->curframe);
16237                 return -EFAULT;
16238         }
16239         /* Propagate read liveness of registers... */
16240         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
16241         for (frame = 0; frame <= vstate->curframe; frame++) {
16242                 parent = vparent->frame[frame];
16243                 state = vstate->frame[frame];
16244                 parent_reg = parent->regs;
16245                 state_reg = state->regs;
16246                 /* We don't need to worry about FP liveness, it's read-only */
16247                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
16248                         err = propagate_liveness_reg(env, &state_reg[i],
16249                                                      &parent_reg[i]);
16250                         if (err < 0)
16251                                 return err;
16252                         if (err == REG_LIVE_READ64)
16253                                 mark_insn_zext(env, &parent_reg[i]);
16254                 }
16255
16256                 /* Propagate stack slots. */
16257                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
16258                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
16259                         parent_reg = &parent->stack[i].spilled_ptr;
16260                         state_reg = &state->stack[i].spilled_ptr;
16261                         err = propagate_liveness_reg(env, state_reg,
16262                                                      parent_reg);
16263                         if (err < 0)
16264                                 return err;
16265                 }
16266         }
16267         return 0;
16268 }
16269
16270 /* find precise scalars in the previous equivalent state and
16271  * propagate them into the current state
16272  */
16273 static int propagate_precision(struct bpf_verifier_env *env,
16274                                const struct bpf_verifier_state *old)
16275 {
16276         struct bpf_reg_state *state_reg;
16277         struct bpf_func_state *state;
16278         int i, err = 0, fr;
16279         bool first;
16280
16281         for (fr = old->curframe; fr >= 0; fr--) {
16282                 state = old->frame[fr];
16283                 state_reg = state->regs;
16284                 first = true;
16285                 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
16286                         if (state_reg->type != SCALAR_VALUE ||
16287                             !state_reg->precise ||
16288                             !(state_reg->live & REG_LIVE_READ))
16289                                 continue;
16290                         if (env->log.level & BPF_LOG_LEVEL2) {
16291                                 if (first)
16292                                         verbose(env, "frame %d: propagating r%d", fr, i);
16293                                 else
16294                                         verbose(env, ",r%d", i);
16295                         }
16296                         bt_set_frame_reg(&env->bt, fr, i);
16297                         first = false;
16298                 }
16299
16300                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
16301                         if (!is_spilled_reg(&state->stack[i]))
16302                                 continue;
16303                         state_reg = &state->stack[i].spilled_ptr;
16304                         if (state_reg->type != SCALAR_VALUE ||
16305                             !state_reg->precise ||
16306                             !(state_reg->live & REG_LIVE_READ))
16307                                 continue;
16308                         if (env->log.level & BPF_LOG_LEVEL2) {
16309                                 if (first)
16310                                         verbose(env, "frame %d: propagating fp%d",
16311                                                 fr, (-i - 1) * BPF_REG_SIZE);
16312                                 else
16313                                         verbose(env, ",fp%d", (-i - 1) * BPF_REG_SIZE);
16314                         }
16315                         bt_set_frame_slot(&env->bt, fr, i);
16316                         first = false;
16317                 }
16318                 if (!first)
16319                         verbose(env, "\n");
16320         }
16321
16322         err = mark_chain_precision_batch(env);
16323         if (err < 0)
16324                 return err;
16325
16326         return 0;
16327 }
16328
16329 static bool states_maybe_looping(struct bpf_verifier_state *old,
16330                                  struct bpf_verifier_state *cur)
16331 {
16332         struct bpf_func_state *fold, *fcur;
16333         int i, fr = cur->curframe;
16334
16335         if (old->curframe != fr)
16336                 return false;
16337
16338         fold = old->frame[fr];
16339         fcur = cur->frame[fr];
16340         for (i = 0; i < MAX_BPF_REG; i++)
16341                 if (memcmp(&fold->regs[i], &fcur->regs[i],
16342                            offsetof(struct bpf_reg_state, parent)))
16343                         return false;
16344         return true;
16345 }
16346
16347 static bool is_iter_next_insn(struct bpf_verifier_env *env, int insn_idx)
16348 {
16349         return env->insn_aux_data[insn_idx].is_iter_next;
16350 }
16351
16352 /* is_state_visited() handles iter_next() (see process_iter_next_call() for
16353  * terminology) calls specially: as opposed to bounded BPF loops, it *expects*
16354  * states to match, which otherwise would look like an infinite loop. So while
16355  * iter_next() calls are taken care of, we still need to be careful and
16356  * prevent erroneous and too eager declaration of "ininite loop", when
16357  * iterators are involved.
16358  *
16359  * Here's a situation in pseudo-BPF assembly form:
16360  *
16361  *   0: again:                          ; set up iter_next() call args
16362  *   1:   r1 = &it                      ; <CHECKPOINT HERE>
16363  *   2:   call bpf_iter_num_next        ; this is iter_next() call
16364  *   3:   if r0 == 0 goto done
16365  *   4:   ... something useful here ...
16366  *   5:   goto again                    ; another iteration
16367  *   6: done:
16368  *   7:   r1 = &it
16369  *   8:   call bpf_iter_num_destroy     ; clean up iter state
16370  *   9:   exit
16371  *
16372  * This is a typical loop. Let's assume that we have a prune point at 1:,
16373  * before we get to `call bpf_iter_num_next` (e.g., because of that `goto
16374  * again`, assuming other heuristics don't get in a way).
16375  *
16376  * When we first time come to 1:, let's say we have some state X. We proceed
16377  * to 2:, fork states, enqueue ACTIVE, validate NULL case successfully, exit.
16378  * Now we come back to validate that forked ACTIVE state. We proceed through
16379  * 3-5, come to goto, jump to 1:. Let's assume our state didn't change, so we
16380  * are converging. But the problem is that we don't know that yet, as this
16381  * convergence has to happen at iter_next() call site only. So if nothing is
16382  * done, at 1: verifier will use bounded loop logic and declare infinite
16383  * looping (and would be *technically* correct, if not for iterator's
16384  * "eventual sticky NULL" contract, see process_iter_next_call()). But we
16385  * don't want that. So what we do in process_iter_next_call() when we go on
16386  * another ACTIVE iteration, we bump slot->iter.depth, to mark that it's
16387  * a different iteration. So when we suspect an infinite loop, we additionally
16388  * check if any of the *ACTIVE* iterator states depths differ. If yes, we
16389  * pretend we are not looping and wait for next iter_next() call.
16390  *
16391  * This only applies to ACTIVE state. In DRAINED state we don't expect to
16392  * loop, because that would actually mean infinite loop, as DRAINED state is
16393  * "sticky", and so we'll keep returning into the same instruction with the
16394  * same state (at least in one of possible code paths).
16395  *
16396  * This approach allows to keep infinite loop heuristic even in the face of
16397  * active iterator. E.g., C snippet below is and will be detected as
16398  * inifintely looping:
16399  *
16400  *   struct bpf_iter_num it;
16401  *   int *p, x;
16402  *
16403  *   bpf_iter_num_new(&it, 0, 10);
16404  *   while ((p = bpf_iter_num_next(&t))) {
16405  *       x = p;
16406  *       while (x--) {} // <<-- infinite loop here
16407  *   }
16408  *
16409  */
16410 static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
16411 {
16412         struct bpf_reg_state *slot, *cur_slot;
16413         struct bpf_func_state *state;
16414         int i, fr;
16415
16416         for (fr = old->curframe; fr >= 0; fr--) {
16417                 state = old->frame[fr];
16418                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
16419                         if (state->stack[i].slot_type[0] != STACK_ITER)
16420                                 continue;
16421
16422                         slot = &state->stack[i].spilled_ptr;
16423                         if (slot->iter.state != BPF_ITER_STATE_ACTIVE)
16424                                 continue;
16425
16426                         cur_slot = &cur->frame[fr]->stack[i].spilled_ptr;
16427                         if (cur_slot->iter.depth != slot->iter.depth)
16428                                 return true;
16429                 }
16430         }
16431         return false;
16432 }
16433
16434 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
16435 {
16436         struct bpf_verifier_state_list *new_sl;
16437         struct bpf_verifier_state_list *sl, **pprev;
16438         struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry;
16439         int i, j, n, err, states_cnt = 0;
16440         bool force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx);
16441         bool add_new_state = force_new_state;
16442         bool force_exact;
16443
16444         /* bpf progs typically have pruning point every 4 instructions
16445          * http://vger.kernel.org/bpfconf2019.html#session-1
16446          * Do not add new state for future pruning if the verifier hasn't seen
16447          * at least 2 jumps and at least 8 instructions.
16448          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
16449          * In tests that amounts to up to 50% reduction into total verifier
16450          * memory consumption and 20% verifier time speedup.
16451          */
16452         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
16453             env->insn_processed - env->prev_insn_processed >= 8)
16454                 add_new_state = true;
16455
16456         pprev = explored_state(env, insn_idx);
16457         sl = *pprev;
16458
16459         clean_live_states(env, insn_idx, cur);
16460
16461         while (sl) {
16462                 states_cnt++;
16463                 if (sl->state.insn_idx != insn_idx)
16464                         goto next;
16465
16466                 if (sl->state.branches) {
16467                         struct bpf_func_state *frame = sl->state.frame[sl->state.curframe];
16468
16469                         if (frame->in_async_callback_fn &&
16470                             frame->async_entry_cnt != cur->frame[cur->curframe]->async_entry_cnt) {
16471                                 /* Different async_entry_cnt means that the verifier is
16472                                  * processing another entry into async callback.
16473                                  * Seeing the same state is not an indication of infinite
16474                                  * loop or infinite recursion.
16475                                  * But finding the same state doesn't mean that it's safe
16476                                  * to stop processing the current state. The previous state
16477                                  * hasn't yet reached bpf_exit, since state.branches > 0.
16478                                  * Checking in_async_callback_fn alone is not enough either.
16479                                  * Since the verifier still needs to catch infinite loops
16480                                  * inside async callbacks.
16481                                  */
16482                                 goto skip_inf_loop_check;
16483                         }
16484                         /* BPF open-coded iterators loop detection is special.
16485                          * states_maybe_looping() logic is too simplistic in detecting
16486                          * states that *might* be equivalent, because it doesn't know
16487                          * about ID remapping, so don't even perform it.
16488                          * See process_iter_next_call() and iter_active_depths_differ()
16489                          * for overview of the logic. When current and one of parent
16490                          * states are detected as equivalent, it's a good thing: we prove
16491                          * convergence and can stop simulating further iterations.
16492                          * It's safe to assume that iterator loop will finish, taking into
16493                          * account iter_next() contract of eventually returning
16494                          * sticky NULL result.
16495                          *
16496                          * Note, that states have to be compared exactly in this case because
16497                          * read and precision marks might not be finalized inside the loop.
16498                          * E.g. as in the program below:
16499                          *
16500                          *     1. r7 = -16
16501                          *     2. r6 = bpf_get_prandom_u32()
16502                          *     3. while (bpf_iter_num_next(&fp[-8])) {
16503                          *     4.   if (r6 != 42) {
16504                          *     5.     r7 = -32
16505                          *     6.     r6 = bpf_get_prandom_u32()
16506                          *     7.     continue
16507                          *     8.   }
16508                          *     9.   r0 = r10
16509                          *    10.   r0 += r7
16510                          *    11.   r8 = *(u64 *)(r0 + 0)
16511                          *    12.   r6 = bpf_get_prandom_u32()
16512                          *    13. }
16513                          *
16514                          * Here verifier would first visit path 1-3, create a checkpoint at 3
16515                          * with r7=-16, continue to 4-7,3. Existing checkpoint at 3 does
16516                          * not have read or precision mark for r7 yet, thus inexact states
16517                          * comparison would discard current state with r7=-32
16518                          * => unsafe memory access at 11 would not be caught.
16519                          */
16520                         if (is_iter_next_insn(env, insn_idx)) {
16521                                 if (states_equal(env, &sl->state, cur, true)) {
16522                                         struct bpf_func_state *cur_frame;
16523                                         struct bpf_reg_state *iter_state, *iter_reg;
16524                                         int spi;
16525
16526                                         cur_frame = cur->frame[cur->curframe];
16527                                         /* btf_check_iter_kfuncs() enforces that
16528                                          * iter state pointer is always the first arg
16529                                          */
16530                                         iter_reg = &cur_frame->regs[BPF_REG_1];
16531                                         /* current state is valid due to states_equal(),
16532                                          * so we can assume valid iter and reg state,
16533                                          * no need for extra (re-)validations
16534                                          */
16535                                         spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
16536                                         iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
16537                                         if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
16538                                                 update_loop_entry(cur, &sl->state);
16539                                                 goto hit;
16540                                         }
16541                                 }
16542                                 goto skip_inf_loop_check;
16543                         }
16544                         /* attempt to detect infinite loop to avoid unnecessary doomed work */
16545                         if (states_maybe_looping(&sl->state, cur) &&
16546                             states_equal(env, &sl->state, cur, false) &&
16547                             !iter_active_depths_differ(&sl->state, cur)) {
16548                                 verbose_linfo(env, insn_idx, "; ");
16549                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
16550                                 verbose(env, "cur state:");
16551                                 print_verifier_state(env, cur->frame[cur->curframe], true);
16552                                 verbose(env, "old state:");
16553                                 print_verifier_state(env, sl->state.frame[cur->curframe], true);
16554                                 return -EINVAL;
16555                         }
16556                         /* if the verifier is processing a loop, avoid adding new state
16557                          * too often, since different loop iterations have distinct
16558                          * states and may not help future pruning.
16559                          * This threshold shouldn't be too low to make sure that
16560                          * a loop with large bound will be rejected quickly.
16561                          * The most abusive loop will be:
16562                          * r1 += 1
16563                          * if r1 < 1000000 goto pc-2
16564                          * 1M insn_procssed limit / 100 == 10k peak states.
16565                          * This threshold shouldn't be too high either, since states
16566                          * at the end of the loop are likely to be useful in pruning.
16567                          */
16568 skip_inf_loop_check:
16569                         if (!force_new_state &&
16570                             env->jmps_processed - env->prev_jmps_processed < 20 &&
16571                             env->insn_processed - env->prev_insn_processed < 100)
16572                                 add_new_state = false;
16573                         goto miss;
16574                 }
16575                 /* If sl->state is a part of a loop and this loop's entry is a part of
16576                  * current verification path then states have to be compared exactly.
16577                  * 'force_exact' is needed to catch the following case:
16578                  *
16579                  *                initial     Here state 'succ' was processed first,
16580                  *                  |         it was eventually tracked to produce a
16581                  *                  V         state identical to 'hdr'.
16582                  *     .---------> hdr        All branches from 'succ' had been explored
16583                  *     |            |         and thus 'succ' has its .branches == 0.
16584                  *     |            V
16585                  *     |    .------...        Suppose states 'cur' and 'succ' correspond
16586                  *     |    |       |         to the same instruction + callsites.
16587                  *     |    V       V         In such case it is necessary to check
16588                  *     |   ...     ...        if 'succ' and 'cur' are states_equal().
16589                  *     |    |       |         If 'succ' and 'cur' are a part of the
16590                  *     |    V       V         same loop exact flag has to be set.
16591                  *     |   succ <- cur        To check if that is the case, verify
16592                  *     |    |                 if loop entry of 'succ' is in current
16593                  *     |    V                 DFS path.
16594                  *     |   ...
16595                  *     |    |
16596                  *     '----'
16597                  *
16598                  * Additional details are in the comment before get_loop_entry().
16599                  */
16600                 loop_entry = get_loop_entry(&sl->state);
16601                 force_exact = loop_entry && loop_entry->branches > 0;
16602                 if (states_equal(env, &sl->state, cur, force_exact)) {
16603                         if (force_exact)
16604                                 update_loop_entry(cur, loop_entry);
16605 hit:
16606                         sl->hit_cnt++;
16607                         /* reached equivalent register/stack state,
16608                          * prune the search.
16609                          * Registers read by the continuation are read by us.
16610                          * If we have any write marks in env->cur_state, they
16611                          * will prevent corresponding reads in the continuation
16612                          * from reaching our parent (an explored_state).  Our
16613                          * own state will get the read marks recorded, but
16614                          * they'll be immediately forgotten as we're pruning
16615                          * this state and will pop a new one.
16616                          */
16617                         err = propagate_liveness(env, &sl->state, cur);
16618
16619                         /* if previous state reached the exit with precision and
16620                          * current state is equivalent to it (except precsion marks)
16621                          * the precision needs to be propagated back in
16622                          * the current state.
16623                          */
16624                         err = err ? : push_jmp_history(env, cur);
16625                         err = err ? : propagate_precision(env, &sl->state);
16626                         if (err)
16627                                 return err;
16628                         return 1;
16629                 }
16630 miss:
16631                 /* when new state is not going to be added do not increase miss count.
16632                  * Otherwise several loop iterations will remove the state
16633                  * recorded earlier. The goal of these heuristics is to have
16634                  * states from some iterations of the loop (some in the beginning
16635                  * and some at the end) to help pruning.
16636                  */
16637                 if (add_new_state)
16638                         sl->miss_cnt++;
16639                 /* heuristic to determine whether this state is beneficial
16640                  * to keep checking from state equivalence point of view.
16641                  * Higher numbers increase max_states_per_insn and verification time,
16642                  * but do not meaningfully decrease insn_processed.
16643                  * 'n' controls how many times state could miss before eviction.
16644                  * Use bigger 'n' for checkpoints because evicting checkpoint states
16645                  * too early would hinder iterator convergence.
16646                  */
16647                 n = is_force_checkpoint(env, insn_idx) && sl->state.branches > 0 ? 64 : 3;
16648                 if (sl->miss_cnt > sl->hit_cnt * n + n) {
16649                         /* the state is unlikely to be useful. Remove it to
16650                          * speed up verification
16651                          */
16652                         *pprev = sl->next;
16653                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE &&
16654                             !sl->state.used_as_loop_entry) {
16655                                 u32 br = sl->state.branches;
16656
16657                                 WARN_ONCE(br,
16658                                           "BUG live_done but branches_to_explore %d\n",
16659                                           br);
16660                                 free_verifier_state(&sl->state, false);
16661                                 kfree(sl);
16662                                 env->peak_states--;
16663                         } else {
16664                                 /* cannot free this state, since parentage chain may
16665                                  * walk it later. Add it for free_list instead to
16666                                  * be freed at the end of verification
16667                                  */
16668                                 sl->next = env->free_list;
16669                                 env->free_list = sl;
16670                         }
16671                         sl = *pprev;
16672                         continue;
16673                 }
16674 next:
16675                 pprev = &sl->next;
16676                 sl = *pprev;
16677         }
16678
16679         if (env->max_states_per_insn < states_cnt)
16680                 env->max_states_per_insn = states_cnt;
16681
16682         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
16683                 return 0;
16684
16685         if (!add_new_state)
16686                 return 0;
16687
16688         /* There were no equivalent states, remember the current one.
16689          * Technically the current state is not proven to be safe yet,
16690          * but it will either reach outer most bpf_exit (which means it's safe)
16691          * or it will be rejected. When there are no loops the verifier won't be
16692          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
16693          * again on the way to bpf_exit.
16694          * When looping the sl->state.branches will be > 0 and this state
16695          * will not be considered for equivalence until branches == 0.
16696          */
16697         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
16698         if (!new_sl)
16699                 return -ENOMEM;
16700         env->total_states++;
16701         env->peak_states++;
16702         env->prev_jmps_processed = env->jmps_processed;
16703         env->prev_insn_processed = env->insn_processed;
16704
16705         /* forget precise markings we inherited, see __mark_chain_precision */
16706         if (env->bpf_capable)
16707                 mark_all_scalars_imprecise(env, cur);
16708
16709         /* add new state to the head of linked list */
16710         new = &new_sl->state;
16711         err = copy_verifier_state(new, cur);
16712         if (err) {
16713                 free_verifier_state(new, false);
16714                 kfree(new_sl);
16715                 return err;
16716         }
16717         new->insn_idx = insn_idx;
16718         WARN_ONCE(new->branches != 1,
16719                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
16720
16721         cur->parent = new;
16722         cur->first_insn_idx = insn_idx;
16723         cur->dfs_depth = new->dfs_depth + 1;
16724         clear_jmp_history(cur);
16725         new_sl->next = *explored_state(env, insn_idx);
16726         *explored_state(env, insn_idx) = new_sl;
16727         /* connect new state to parentage chain. Current frame needs all
16728          * registers connected. Only r6 - r9 of the callers are alive (pushed
16729          * to the stack implicitly by JITs) so in callers' frames connect just
16730          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
16731          * the state of the call instruction (with WRITTEN set), and r0 comes
16732          * from callee with its full parentage chain, anyway.
16733          */
16734         /* clear write marks in current state: the writes we did are not writes
16735          * our child did, so they don't screen off its reads from us.
16736          * (There are no read marks in current state, because reads always mark
16737          * their parent and current state never has children yet.  Only
16738          * explored_states can get read marks.)
16739          */
16740         for (j = 0; j <= cur->curframe; j++) {
16741                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
16742                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
16743                 for (i = 0; i < BPF_REG_FP; i++)
16744                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
16745         }
16746
16747         /* all stack frames are accessible from callee, clear them all */
16748         for (j = 0; j <= cur->curframe; j++) {
16749                 struct bpf_func_state *frame = cur->frame[j];
16750                 struct bpf_func_state *newframe = new->frame[j];
16751
16752                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
16753                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
16754                         frame->stack[i].spilled_ptr.parent =
16755                                                 &newframe->stack[i].spilled_ptr;
16756                 }
16757         }
16758         return 0;
16759 }
16760
16761 /* Return true if it's OK to have the same insn return a different type. */
16762 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
16763 {
16764         switch (base_type(type)) {
16765         case PTR_TO_CTX:
16766         case PTR_TO_SOCKET:
16767         case PTR_TO_SOCK_COMMON:
16768         case PTR_TO_TCP_SOCK:
16769         case PTR_TO_XDP_SOCK:
16770         case PTR_TO_BTF_ID:
16771                 return false;
16772         default:
16773                 return true;
16774         }
16775 }
16776
16777 /* If an instruction was previously used with particular pointer types, then we
16778  * need to be careful to avoid cases such as the below, where it may be ok
16779  * for one branch accessing the pointer, but not ok for the other branch:
16780  *
16781  * R1 = sock_ptr
16782  * goto X;
16783  * ...
16784  * R1 = some_other_valid_ptr;
16785  * goto X;
16786  * ...
16787  * R2 = *(u32 *)(R1 + 0);
16788  */
16789 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
16790 {
16791         return src != prev && (!reg_type_mismatch_ok(src) ||
16792                                !reg_type_mismatch_ok(prev));
16793 }
16794
16795 static int save_aux_ptr_type(struct bpf_verifier_env *env, enum bpf_reg_type type,
16796                              bool allow_trust_missmatch)
16797 {
16798         enum bpf_reg_type *prev_type = &env->insn_aux_data[env->insn_idx].ptr_type;
16799
16800         if (*prev_type == NOT_INIT) {
16801                 /* Saw a valid insn
16802                  * dst_reg = *(u32 *)(src_reg + off)
16803                  * save type to validate intersecting paths
16804                  */
16805                 *prev_type = type;
16806         } else if (reg_type_mismatch(type, *prev_type)) {
16807                 /* Abuser program is trying to use the same insn
16808                  * dst_reg = *(u32*) (src_reg + off)
16809                  * with different pointer types:
16810                  * src_reg == ctx in one branch and
16811                  * src_reg == stack|map in some other branch.
16812                  * Reject it.
16813                  */
16814                 if (allow_trust_missmatch &&
16815                     base_type(type) == PTR_TO_BTF_ID &&
16816                     base_type(*prev_type) == PTR_TO_BTF_ID) {
16817                         /*
16818                          * Have to support a use case when one path through
16819                          * the program yields TRUSTED pointer while another
16820                          * is UNTRUSTED. Fallback to UNTRUSTED to generate
16821                          * BPF_PROBE_MEM/BPF_PROBE_MEMSX.
16822                          */
16823                         *prev_type = PTR_TO_BTF_ID | PTR_UNTRUSTED;
16824                 } else {
16825                         verbose(env, "same insn cannot be used with different pointers\n");
16826                         return -EINVAL;
16827                 }
16828         }
16829
16830         return 0;
16831 }
16832
16833 static int do_check(struct bpf_verifier_env *env)
16834 {
16835         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
16836         struct bpf_verifier_state *state = env->cur_state;
16837         struct bpf_insn *insns = env->prog->insnsi;
16838         struct bpf_reg_state *regs;
16839         int insn_cnt = env->prog->len;
16840         bool do_print_state = false;
16841         int prev_insn_idx = -1;
16842
16843         for (;;) {
16844                 struct bpf_insn *insn;
16845                 u8 class;
16846                 int err;
16847
16848                 env->prev_insn_idx = prev_insn_idx;
16849                 if (env->insn_idx >= insn_cnt) {
16850                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
16851                                 env->insn_idx, insn_cnt);
16852                         return -EFAULT;
16853                 }
16854
16855                 insn = &insns[env->insn_idx];
16856                 class = BPF_CLASS(insn->code);
16857
16858                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
16859                         verbose(env,
16860                                 "BPF program is too large. Processed %d insn\n",
16861                                 env->insn_processed);
16862                         return -E2BIG;
16863                 }
16864
16865                 state->last_insn_idx = env->prev_insn_idx;
16866
16867                 if (is_prune_point(env, env->insn_idx)) {
16868                         err = is_state_visited(env, env->insn_idx);
16869                         if (err < 0)
16870                                 return err;
16871                         if (err == 1) {
16872                                 /* found equivalent state, can prune the search */
16873                                 if (env->log.level & BPF_LOG_LEVEL) {
16874                                         if (do_print_state)
16875                                                 verbose(env, "\nfrom %d to %d%s: safe\n",
16876                                                         env->prev_insn_idx, env->insn_idx,
16877                                                         env->cur_state->speculative ?
16878                                                         " (speculative execution)" : "");
16879                                         else
16880                                                 verbose(env, "%d: safe\n", env->insn_idx);
16881                                 }
16882                                 goto process_bpf_exit;
16883                         }
16884                 }
16885
16886                 if (is_jmp_point(env, env->insn_idx)) {
16887                         err = push_jmp_history(env, state);
16888                         if (err)
16889                                 return err;
16890                 }
16891
16892                 if (signal_pending(current))
16893                         return -EAGAIN;
16894
16895                 if (need_resched())
16896                         cond_resched();
16897
16898                 if (env->log.level & BPF_LOG_LEVEL2 && do_print_state) {
16899                         verbose(env, "\nfrom %d to %d%s:",
16900                                 env->prev_insn_idx, env->insn_idx,
16901                                 env->cur_state->speculative ?
16902                                 " (speculative execution)" : "");
16903                         print_verifier_state(env, state->frame[state->curframe], true);
16904                         do_print_state = false;
16905                 }
16906
16907                 if (env->log.level & BPF_LOG_LEVEL) {
16908                         const struct bpf_insn_cbs cbs = {
16909                                 .cb_call        = disasm_kfunc_name,
16910                                 .cb_print       = verbose,
16911                                 .private_data   = env,
16912                         };
16913
16914                         if (verifier_state_scratched(env))
16915                                 print_insn_state(env, state->frame[state->curframe]);
16916
16917                         verbose_linfo(env, env->insn_idx, "; ");
16918                         env->prev_log_pos = env->log.end_pos;
16919                         verbose(env, "%d: ", env->insn_idx);
16920                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
16921                         env->prev_insn_print_pos = env->log.end_pos - env->prev_log_pos;
16922                         env->prev_log_pos = env->log.end_pos;
16923                 }
16924
16925                 if (bpf_prog_is_offloaded(env->prog->aux)) {
16926                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
16927                                                            env->prev_insn_idx);
16928                         if (err)
16929                                 return err;
16930                 }
16931
16932                 regs = cur_regs(env);
16933                 sanitize_mark_insn_seen(env);
16934                 prev_insn_idx = env->insn_idx;
16935
16936                 if (class == BPF_ALU || class == BPF_ALU64) {
16937                         err = check_alu_op(env, insn);
16938                         if (err)
16939                                 return err;
16940
16941                 } else if (class == BPF_LDX) {
16942                         enum bpf_reg_type src_reg_type;
16943
16944                         /* check for reserved fields is already done */
16945
16946                         /* check src operand */
16947                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
16948                         if (err)
16949                                 return err;
16950
16951                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
16952                         if (err)
16953                                 return err;
16954
16955                         src_reg_type = regs[insn->src_reg].type;
16956
16957                         /* check that memory (src_reg + off) is readable,
16958                          * the state of dst_reg will be updated by this func
16959                          */
16960                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
16961                                                insn->off, BPF_SIZE(insn->code),
16962                                                BPF_READ, insn->dst_reg, false,
16963                                                BPF_MODE(insn->code) == BPF_MEMSX);
16964                         if (err)
16965                                 return err;
16966
16967                         err = save_aux_ptr_type(env, src_reg_type, true);
16968                         if (err)
16969                                 return err;
16970                 } else if (class == BPF_STX) {
16971                         enum bpf_reg_type dst_reg_type;
16972
16973                         if (BPF_MODE(insn->code) == BPF_ATOMIC) {
16974                                 err = check_atomic(env, env->insn_idx, insn);
16975                                 if (err)
16976                                         return err;
16977                                 env->insn_idx++;
16978                                 continue;
16979                         }
16980
16981                         if (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0) {
16982                                 verbose(env, "BPF_STX uses reserved fields\n");
16983                                 return -EINVAL;
16984                         }
16985
16986                         /* check src1 operand */
16987                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
16988                         if (err)
16989                                 return err;
16990                         /* check src2 operand */
16991                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
16992                         if (err)
16993                                 return err;
16994
16995                         dst_reg_type = regs[insn->dst_reg].type;
16996
16997                         /* check that memory (dst_reg + off) is writeable */
16998                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
16999                                                insn->off, BPF_SIZE(insn->code),
17000                                                BPF_WRITE, insn->src_reg, false, false);
17001                         if (err)
17002                                 return err;
17003
17004                         err = save_aux_ptr_type(env, dst_reg_type, false);
17005                         if (err)
17006                                 return err;
17007                 } else if (class == BPF_ST) {
17008                         enum bpf_reg_type dst_reg_type;
17009
17010                         if (BPF_MODE(insn->code) != BPF_MEM ||
17011                             insn->src_reg != BPF_REG_0) {
17012                                 verbose(env, "BPF_ST uses reserved fields\n");
17013                                 return -EINVAL;
17014                         }
17015                         /* check src operand */
17016                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
17017                         if (err)
17018                                 return err;
17019
17020                         dst_reg_type = regs[insn->dst_reg].type;
17021
17022                         /* check that memory (dst_reg + off) is writeable */
17023                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
17024                                                insn->off, BPF_SIZE(insn->code),
17025                                                BPF_WRITE, -1, false, false);
17026                         if (err)
17027                                 return err;
17028
17029                         err = save_aux_ptr_type(env, dst_reg_type, false);
17030                         if (err)
17031                                 return err;
17032                 } else if (class == BPF_JMP || class == BPF_JMP32) {
17033                         u8 opcode = BPF_OP(insn->code);
17034
17035                         env->jmps_processed++;
17036                         if (opcode == BPF_CALL) {
17037                                 if (BPF_SRC(insn->code) != BPF_K ||
17038                                     (insn->src_reg != BPF_PSEUDO_KFUNC_CALL
17039                                      && insn->off != 0) ||
17040                                     (insn->src_reg != BPF_REG_0 &&
17041                                      insn->src_reg != BPF_PSEUDO_CALL &&
17042                                      insn->src_reg != BPF_PSEUDO_KFUNC_CALL) ||
17043                                     insn->dst_reg != BPF_REG_0 ||
17044                                     class == BPF_JMP32) {
17045                                         verbose(env, "BPF_CALL uses reserved fields\n");
17046                                         return -EINVAL;
17047                                 }
17048
17049                                 if (env->cur_state->active_lock.ptr) {
17050                                         if ((insn->src_reg == BPF_REG_0 && insn->imm != BPF_FUNC_spin_unlock) ||
17051                                             (insn->src_reg == BPF_PSEUDO_CALL) ||
17052                                             (insn->src_reg == BPF_PSEUDO_KFUNC_CALL &&
17053                                              (insn->off != 0 || !is_bpf_graph_api_kfunc(insn->imm)))) {
17054                                                 verbose(env, "function calls are not allowed while holding a lock\n");
17055                                                 return -EINVAL;
17056                                         }
17057                                 }
17058                                 if (insn->src_reg == BPF_PSEUDO_CALL)
17059                                         err = check_func_call(env, insn, &env->insn_idx);
17060                                 else if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL)
17061                                         err = check_kfunc_call(env, insn, &env->insn_idx);
17062                                 else
17063                                         err = check_helper_call(env, insn, &env->insn_idx);
17064                                 if (err)
17065                                         return err;
17066
17067                                 mark_reg_scratched(env, BPF_REG_0);
17068                         } else if (opcode == BPF_JA) {
17069                                 if (BPF_SRC(insn->code) != BPF_K ||
17070                                     insn->src_reg != BPF_REG_0 ||
17071                                     insn->dst_reg != BPF_REG_0 ||
17072                                     (class == BPF_JMP && insn->imm != 0) ||
17073                                     (class == BPF_JMP32 && insn->off != 0)) {
17074                                         verbose(env, "BPF_JA uses reserved fields\n");
17075                                         return -EINVAL;
17076                                 }
17077
17078                                 if (class == BPF_JMP)
17079                                         env->insn_idx += insn->off + 1;
17080                                 else
17081                                         env->insn_idx += insn->imm + 1;
17082                                 continue;
17083
17084                         } else if (opcode == BPF_EXIT) {
17085                                 if (BPF_SRC(insn->code) != BPF_K ||
17086                                     insn->imm != 0 ||
17087                                     insn->src_reg != BPF_REG_0 ||
17088                                     insn->dst_reg != BPF_REG_0 ||
17089                                     class == BPF_JMP32) {
17090                                         verbose(env, "BPF_EXIT uses reserved fields\n");
17091                                         return -EINVAL;
17092                                 }
17093
17094                                 if (env->cur_state->active_lock.ptr &&
17095                                     !in_rbtree_lock_required_cb(env)) {
17096                                         verbose(env, "bpf_spin_unlock is missing\n");
17097                                         return -EINVAL;
17098                                 }
17099
17100                                 if (env->cur_state->active_rcu_lock &&
17101                                     !in_rbtree_lock_required_cb(env)) {
17102                                         verbose(env, "bpf_rcu_read_unlock is missing\n");
17103                                         return -EINVAL;
17104                                 }
17105
17106                                 /* We must do check_reference_leak here before
17107                                  * prepare_func_exit to handle the case when
17108                                  * state->curframe > 0, it may be a callback
17109                                  * function, for which reference_state must
17110                                  * match caller reference state when it exits.
17111                                  */
17112                                 err = check_reference_leak(env);
17113                                 if (err)
17114                                         return err;
17115
17116                                 if (state->curframe) {
17117                                         /* exit from nested function */
17118                                         err = prepare_func_exit(env, &env->insn_idx);
17119                                         if (err)
17120                                                 return err;
17121                                         do_print_state = true;
17122                                         continue;
17123                                 }
17124
17125                                 err = check_return_code(env);
17126                                 if (err)
17127                                         return err;
17128 process_bpf_exit:
17129                                 mark_verifier_state_scratched(env);
17130                                 update_branch_counts(env, env->cur_state);
17131                                 err = pop_stack(env, &prev_insn_idx,
17132                                                 &env->insn_idx, pop_log);
17133                                 if (err < 0) {
17134                                         if (err != -ENOENT)
17135                                                 return err;
17136                                         break;
17137                                 } else {
17138                                         do_print_state = true;
17139                                         continue;
17140                                 }
17141                         } else {
17142                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
17143                                 if (err)
17144                                         return err;
17145                         }
17146                 } else if (class == BPF_LD) {
17147                         u8 mode = BPF_MODE(insn->code);
17148
17149                         if (mode == BPF_ABS || mode == BPF_IND) {
17150                                 err = check_ld_abs(env, insn);
17151                                 if (err)
17152                                         return err;
17153
17154                         } else if (mode == BPF_IMM) {
17155                                 err = check_ld_imm(env, insn);
17156                                 if (err)
17157                                         return err;
17158
17159                                 env->insn_idx++;
17160                                 sanitize_mark_insn_seen(env);
17161                         } else {
17162                                 verbose(env, "invalid BPF_LD mode\n");
17163                                 return -EINVAL;
17164                         }
17165                 } else {
17166                         verbose(env, "unknown insn class %d\n", class);
17167                         return -EINVAL;
17168                 }
17169
17170                 env->insn_idx++;
17171         }
17172
17173         return 0;
17174 }
17175
17176 static int find_btf_percpu_datasec(struct btf *btf)
17177 {
17178         const struct btf_type *t;
17179         const char *tname;
17180         int i, n;
17181
17182         /*
17183          * Both vmlinux and module each have their own ".data..percpu"
17184          * DATASECs in BTF. So for module's case, we need to skip vmlinux BTF
17185          * types to look at only module's own BTF types.
17186          */
17187         n = btf_nr_types(btf);
17188         if (btf_is_module(btf))
17189                 i = btf_nr_types(btf_vmlinux);
17190         else
17191                 i = 1;
17192
17193         for(; i < n; i++) {
17194                 t = btf_type_by_id(btf, i);
17195                 if (BTF_INFO_KIND(t->info) != BTF_KIND_DATASEC)
17196                         continue;
17197
17198                 tname = btf_name_by_offset(btf, t->name_off);
17199                 if (!strcmp(tname, ".data..percpu"))
17200                         return i;
17201         }
17202
17203         return -ENOENT;
17204 }
17205
17206 /* replace pseudo btf_id with kernel symbol address */
17207 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
17208                                struct bpf_insn *insn,
17209                                struct bpf_insn_aux_data *aux)
17210 {
17211         const struct btf_var_secinfo *vsi;
17212         const struct btf_type *datasec;
17213         struct btf_mod_pair *btf_mod;
17214         const struct btf_type *t;
17215         const char *sym_name;
17216         bool percpu = false;
17217         u32 type, id = insn->imm;
17218         struct btf *btf;
17219         s32 datasec_id;
17220         u64 addr;
17221         int i, btf_fd, err;
17222
17223         btf_fd = insn[1].imm;
17224         if (btf_fd) {
17225                 btf = btf_get_by_fd(btf_fd);
17226                 if (IS_ERR(btf)) {
17227                         verbose(env, "invalid module BTF object FD specified.\n");
17228                         return -EINVAL;
17229                 }
17230         } else {
17231                 if (!btf_vmlinux) {
17232                         verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
17233                         return -EINVAL;
17234                 }
17235                 btf = btf_vmlinux;
17236                 btf_get(btf);
17237         }
17238
17239         t = btf_type_by_id(btf, id);
17240         if (!t) {
17241                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
17242                 err = -ENOENT;
17243                 goto err_put;
17244         }
17245
17246         if (!btf_type_is_var(t) && !btf_type_is_func(t)) {
17247                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR or KIND_FUNC\n", id);
17248                 err = -EINVAL;
17249                 goto err_put;
17250         }
17251
17252         sym_name = btf_name_by_offset(btf, t->name_off);
17253         addr = kallsyms_lookup_name(sym_name);
17254         if (!addr) {
17255                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
17256                         sym_name);
17257                 err = -ENOENT;
17258                 goto err_put;
17259         }
17260         insn[0].imm = (u32)addr;
17261         insn[1].imm = addr >> 32;
17262
17263         if (btf_type_is_func(t)) {
17264                 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17265                 aux->btf_var.mem_size = 0;
17266                 goto check_btf;
17267         }
17268
17269         datasec_id = find_btf_percpu_datasec(btf);
17270         if (datasec_id > 0) {
17271                 datasec = btf_type_by_id(btf, datasec_id);
17272                 for_each_vsi(i, datasec, vsi) {
17273                         if (vsi->type == id) {
17274                                 percpu = true;
17275                                 break;
17276                         }
17277                 }
17278         }
17279
17280         type = t->type;
17281         t = btf_type_skip_modifiers(btf, type, NULL);
17282         if (percpu) {
17283                 aux->btf_var.reg_type = PTR_TO_BTF_ID | MEM_PERCPU;
17284                 aux->btf_var.btf = btf;
17285                 aux->btf_var.btf_id = type;
17286         } else if (!btf_type_is_struct(t)) {
17287                 const struct btf_type *ret;
17288                 const char *tname;
17289                 u32 tsize;
17290
17291                 /* resolve the type size of ksym. */
17292                 ret = btf_resolve_size(btf, t, &tsize);
17293                 if (IS_ERR(ret)) {
17294                         tname = btf_name_by_offset(btf, t->name_off);
17295                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
17296                                 tname, PTR_ERR(ret));
17297                         err = -EINVAL;
17298                         goto err_put;
17299                 }
17300                 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
17301                 aux->btf_var.mem_size = tsize;
17302         } else {
17303                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
17304                 aux->btf_var.btf = btf;
17305                 aux->btf_var.btf_id = type;
17306         }
17307 check_btf:
17308         /* check whether we recorded this BTF (and maybe module) already */
17309         for (i = 0; i < env->used_btf_cnt; i++) {
17310                 if (env->used_btfs[i].btf == btf) {
17311                         btf_put(btf);
17312                         return 0;
17313                 }
17314         }
17315
17316         if (env->used_btf_cnt >= MAX_USED_BTFS) {
17317                 err = -E2BIG;
17318                 goto err_put;
17319         }
17320
17321         btf_mod = &env->used_btfs[env->used_btf_cnt];
17322         btf_mod->btf = btf;
17323         btf_mod->module = NULL;
17324
17325         /* if we reference variables from kernel module, bump its refcount */
17326         if (btf_is_module(btf)) {
17327                 btf_mod->module = btf_try_get_module(btf);
17328                 if (!btf_mod->module) {
17329                         err = -ENXIO;
17330                         goto err_put;
17331                 }
17332         }
17333
17334         env->used_btf_cnt++;
17335
17336         return 0;
17337 err_put:
17338         btf_put(btf);
17339         return err;
17340 }
17341
17342 static bool is_tracing_prog_type(enum bpf_prog_type type)
17343 {
17344         switch (type) {
17345         case BPF_PROG_TYPE_KPROBE:
17346         case BPF_PROG_TYPE_TRACEPOINT:
17347         case BPF_PROG_TYPE_PERF_EVENT:
17348         case BPF_PROG_TYPE_RAW_TRACEPOINT:
17349         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
17350                 return true;
17351         default:
17352                 return false;
17353         }
17354 }
17355
17356 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
17357                                         struct bpf_map *map,
17358                                         struct bpf_prog *prog)
17359
17360 {
17361         enum bpf_prog_type prog_type = resolve_prog_type(prog);
17362
17363         if (btf_record_has_field(map->record, BPF_LIST_HEAD) ||
17364             btf_record_has_field(map->record, BPF_RB_ROOT)) {
17365                 if (is_tracing_prog_type(prog_type)) {
17366                         verbose(env, "tracing progs cannot use bpf_{list_head,rb_root} yet\n");
17367                         return -EINVAL;
17368                 }
17369         }
17370
17371         if (btf_record_has_field(map->record, BPF_SPIN_LOCK)) {
17372                 if (prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
17373                         verbose(env, "socket filter progs cannot use bpf_spin_lock yet\n");
17374                         return -EINVAL;
17375                 }
17376
17377                 if (is_tracing_prog_type(prog_type)) {
17378                         verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
17379                         return -EINVAL;
17380                 }
17381         }
17382
17383         if (btf_record_has_field(map->record, BPF_TIMER)) {
17384                 if (is_tracing_prog_type(prog_type)) {
17385                         verbose(env, "tracing progs cannot use bpf_timer yet\n");
17386                         return -EINVAL;
17387                 }
17388         }
17389
17390         if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
17391             !bpf_offload_prog_map_match(prog, map)) {
17392                 verbose(env, "offload device mismatch between prog and map\n");
17393                 return -EINVAL;
17394         }
17395
17396         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
17397                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
17398                 return -EINVAL;
17399         }
17400
17401         if (prog->aux->sleepable)
17402                 switch (map->map_type) {
17403                 case BPF_MAP_TYPE_HASH:
17404                 case BPF_MAP_TYPE_LRU_HASH:
17405                 case BPF_MAP_TYPE_ARRAY:
17406                 case BPF_MAP_TYPE_PERCPU_HASH:
17407                 case BPF_MAP_TYPE_PERCPU_ARRAY:
17408                 case BPF_MAP_TYPE_LRU_PERCPU_HASH:
17409                 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
17410                 case BPF_MAP_TYPE_HASH_OF_MAPS:
17411                 case BPF_MAP_TYPE_RINGBUF:
17412                 case BPF_MAP_TYPE_USER_RINGBUF:
17413                 case BPF_MAP_TYPE_INODE_STORAGE:
17414                 case BPF_MAP_TYPE_SK_STORAGE:
17415                 case BPF_MAP_TYPE_TASK_STORAGE:
17416                 case BPF_MAP_TYPE_CGRP_STORAGE:
17417                         break;
17418                 default:
17419                         verbose(env,
17420                                 "Sleepable programs can only use array, hash, ringbuf and local storage maps\n");
17421                         return -EINVAL;
17422                 }
17423
17424         return 0;
17425 }
17426
17427 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
17428 {
17429         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
17430                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
17431 }
17432
17433 /* find and rewrite pseudo imm in ld_imm64 instructions:
17434  *
17435  * 1. if it accesses map FD, replace it with actual map pointer.
17436  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
17437  *
17438  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
17439  */
17440 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
17441 {
17442         struct bpf_insn *insn = env->prog->insnsi;
17443         int insn_cnt = env->prog->len;
17444         int i, j, err;
17445
17446         err = bpf_prog_calc_tag(env->prog);
17447         if (err)
17448                 return err;
17449
17450         for (i = 0; i < insn_cnt; i++, insn++) {
17451                 if (BPF_CLASS(insn->code) == BPF_LDX &&
17452                     ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_MEMSX) ||
17453                     insn->imm != 0)) {
17454                         verbose(env, "BPF_LDX uses reserved fields\n");
17455                         return -EINVAL;
17456                 }
17457
17458                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
17459                         struct bpf_insn_aux_data *aux;
17460                         struct bpf_map *map;
17461                         struct fd f;
17462                         u64 addr;
17463                         u32 fd;
17464
17465                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
17466                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
17467                             insn[1].off != 0) {
17468                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
17469                                 return -EINVAL;
17470                         }
17471
17472                         if (insn[0].src_reg == 0)
17473                                 /* valid generic load 64-bit imm */
17474                                 goto next_insn;
17475
17476                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
17477                                 aux = &env->insn_aux_data[i];
17478                                 err = check_pseudo_btf_id(env, insn, aux);
17479                                 if (err)
17480                                         return err;
17481                                 goto next_insn;
17482                         }
17483
17484                         if (insn[0].src_reg == BPF_PSEUDO_FUNC) {
17485                                 aux = &env->insn_aux_data[i];
17486                                 aux->ptr_type = PTR_TO_FUNC;
17487                                 goto next_insn;
17488                         }
17489
17490                         /* In final convert_pseudo_ld_imm64() step, this is
17491                          * converted into regular 64-bit imm load insn.
17492                          */
17493                         switch (insn[0].src_reg) {
17494                         case BPF_PSEUDO_MAP_VALUE:
17495                         case BPF_PSEUDO_MAP_IDX_VALUE:
17496                                 break;
17497                         case BPF_PSEUDO_MAP_FD:
17498                         case BPF_PSEUDO_MAP_IDX:
17499                                 if (insn[1].imm == 0)
17500                                         break;
17501                                 fallthrough;
17502                         default:
17503                                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
17504                                 return -EINVAL;
17505                         }
17506
17507                         switch (insn[0].src_reg) {
17508                         case BPF_PSEUDO_MAP_IDX_VALUE:
17509                         case BPF_PSEUDO_MAP_IDX:
17510                                 if (bpfptr_is_null(env->fd_array)) {
17511                                         verbose(env, "fd_idx without fd_array is invalid\n");
17512                                         return -EPROTO;
17513                                 }
17514                                 if (copy_from_bpfptr_offset(&fd, env->fd_array,
17515                                                             insn[0].imm * sizeof(fd),
17516                                                             sizeof(fd)))
17517                                         return -EFAULT;
17518                                 break;
17519                         default:
17520                                 fd = insn[0].imm;
17521                                 break;
17522                         }
17523
17524                         f = fdget(fd);
17525                         map = __bpf_map_get(f);
17526                         if (IS_ERR(map)) {
17527                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
17528                                         insn[0].imm);
17529                                 return PTR_ERR(map);
17530                         }
17531
17532                         err = check_map_prog_compatibility(env, map, env->prog);
17533                         if (err) {
17534                                 fdput(f);
17535                                 return err;
17536                         }
17537
17538                         aux = &env->insn_aux_data[i];
17539                         if (insn[0].src_reg == BPF_PSEUDO_MAP_FD ||
17540                             insn[0].src_reg == BPF_PSEUDO_MAP_IDX) {
17541                                 addr = (unsigned long)map;
17542                         } else {
17543                                 u32 off = insn[1].imm;
17544
17545                                 if (off >= BPF_MAX_VAR_OFF) {
17546                                         verbose(env, "direct value offset of %u is not allowed\n", off);
17547                                         fdput(f);
17548                                         return -EINVAL;
17549                                 }
17550
17551                                 if (!map->ops->map_direct_value_addr) {
17552                                         verbose(env, "no direct value access support for this map type\n");
17553                                         fdput(f);
17554                                         return -EINVAL;
17555                                 }
17556
17557                                 err = map->ops->map_direct_value_addr(map, &addr, off);
17558                                 if (err) {
17559                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
17560                                                 map->value_size, off);
17561                                         fdput(f);
17562                                         return err;
17563                                 }
17564
17565                                 aux->map_off = off;
17566                                 addr += off;
17567                         }
17568
17569                         insn[0].imm = (u32)addr;
17570                         insn[1].imm = addr >> 32;
17571
17572                         /* check whether we recorded this map already */
17573                         for (j = 0; j < env->used_map_cnt; j++) {
17574                                 if (env->used_maps[j] == map) {
17575                                         aux->map_index = j;
17576                                         fdput(f);
17577                                         goto next_insn;
17578                                 }
17579                         }
17580
17581                         if (env->used_map_cnt >= MAX_USED_MAPS) {
17582                                 fdput(f);
17583                                 return -E2BIG;
17584                         }
17585
17586                         /* hold the map. If the program is rejected by verifier,
17587                          * the map will be released by release_maps() or it
17588                          * will be used by the valid program until it's unloaded
17589                          * and all maps are released in free_used_maps()
17590                          */
17591                         bpf_map_inc(map);
17592
17593                         aux->map_index = env->used_map_cnt;
17594                         env->used_maps[env->used_map_cnt++] = map;
17595
17596                         if (bpf_map_is_cgroup_storage(map) &&
17597                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
17598                                 verbose(env, "only one cgroup storage of each type is allowed\n");
17599                                 fdput(f);
17600                                 return -EBUSY;
17601                         }
17602
17603                         fdput(f);
17604 next_insn:
17605                         insn++;
17606                         i++;
17607                         continue;
17608                 }
17609
17610                 /* Basic sanity check before we invest more work here. */
17611                 if (!bpf_opcode_in_insntable(insn->code)) {
17612                         verbose(env, "unknown opcode %02x\n", insn->code);
17613                         return -EINVAL;
17614                 }
17615         }
17616
17617         /* now all pseudo BPF_LD_IMM64 instructions load valid
17618          * 'struct bpf_map *' into a register instead of user map_fd.
17619          * These pointers will be used later by verifier to validate map access.
17620          */
17621         return 0;
17622 }
17623
17624 /* drop refcnt of maps used by the rejected program */
17625 static void release_maps(struct bpf_verifier_env *env)
17626 {
17627         __bpf_free_used_maps(env->prog->aux, env->used_maps,
17628                              env->used_map_cnt);
17629 }
17630
17631 /* drop refcnt of maps used by the rejected program */
17632 static void release_btfs(struct bpf_verifier_env *env)
17633 {
17634         __bpf_free_used_btfs(env->prog->aux, env->used_btfs,
17635                              env->used_btf_cnt);
17636 }
17637
17638 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
17639 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
17640 {
17641         struct bpf_insn *insn = env->prog->insnsi;
17642         int insn_cnt = env->prog->len;
17643         int i;
17644
17645         for (i = 0; i < insn_cnt; i++, insn++) {
17646                 if (insn->code != (BPF_LD | BPF_IMM | BPF_DW))
17647                         continue;
17648                 if (insn->src_reg == BPF_PSEUDO_FUNC)
17649                         continue;
17650                 insn->src_reg = 0;
17651         }
17652 }
17653
17654 /* single env->prog->insni[off] instruction was replaced with the range
17655  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
17656  * [0, off) and [off, end) to new locations, so the patched range stays zero
17657  */
17658 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
17659                                  struct bpf_insn_aux_data *new_data,
17660                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
17661 {
17662         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
17663         struct bpf_insn *insn = new_prog->insnsi;
17664         u32 old_seen = old_data[off].seen;
17665         u32 prog_len;
17666         int i;
17667
17668         /* aux info at OFF always needs adjustment, no matter fast path
17669          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
17670          * original insn at old prog.
17671          */
17672         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
17673
17674         if (cnt == 1)
17675                 return;
17676         prog_len = new_prog->len;
17677
17678         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
17679         memcpy(new_data + off + cnt - 1, old_data + off,
17680                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
17681         for (i = off; i < off + cnt - 1; i++) {
17682                 /* Expand insni[off]'s seen count to the patched range. */
17683                 new_data[i].seen = old_seen;
17684                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
17685         }
17686         env->insn_aux_data = new_data;
17687         vfree(old_data);
17688 }
17689
17690 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
17691 {
17692         int i;
17693
17694         if (len == 1)
17695                 return;
17696         /* NOTE: fake 'exit' subprog should be updated as well. */
17697         for (i = 0; i <= env->subprog_cnt; i++) {
17698                 if (env->subprog_info[i].start <= off)
17699                         continue;
17700                 env->subprog_info[i].start += len - 1;
17701         }
17702 }
17703
17704 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
17705 {
17706         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
17707         int i, sz = prog->aux->size_poke_tab;
17708         struct bpf_jit_poke_descriptor *desc;
17709
17710         for (i = 0; i < sz; i++) {
17711                 desc = &tab[i];
17712                 if (desc->insn_idx <= off)
17713                         continue;
17714                 desc->insn_idx += len - 1;
17715         }
17716 }
17717
17718 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
17719                                             const struct bpf_insn *patch, u32 len)
17720 {
17721         struct bpf_prog *new_prog;
17722         struct bpf_insn_aux_data *new_data = NULL;
17723
17724         if (len > 1) {
17725                 new_data = vzalloc(array_size(env->prog->len + len - 1,
17726                                               sizeof(struct bpf_insn_aux_data)));
17727                 if (!new_data)
17728                         return NULL;
17729         }
17730
17731         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
17732         if (IS_ERR(new_prog)) {
17733                 if (PTR_ERR(new_prog) == -ERANGE)
17734                         verbose(env,
17735                                 "insn %d cannot be patched due to 16-bit range\n",
17736                                 env->insn_aux_data[off].orig_idx);
17737                 vfree(new_data);
17738                 return NULL;
17739         }
17740         adjust_insn_aux_data(env, new_data, new_prog, off, len);
17741         adjust_subprog_starts(env, off, len);
17742         adjust_poke_descs(new_prog, off, len);
17743         return new_prog;
17744 }
17745
17746 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
17747                                               u32 off, u32 cnt)
17748 {
17749         int i, j;
17750
17751         /* find first prog starting at or after off (first to remove) */
17752         for (i = 0; i < env->subprog_cnt; i++)
17753                 if (env->subprog_info[i].start >= off)
17754                         break;
17755         /* find first prog starting at or after off + cnt (first to stay) */
17756         for (j = i; j < env->subprog_cnt; j++)
17757                 if (env->subprog_info[j].start >= off + cnt)
17758                         break;
17759         /* if j doesn't start exactly at off + cnt, we are just removing
17760          * the front of previous prog
17761          */
17762         if (env->subprog_info[j].start != off + cnt)
17763                 j--;
17764
17765         if (j > i) {
17766                 struct bpf_prog_aux *aux = env->prog->aux;
17767                 int move;
17768
17769                 /* move fake 'exit' subprog as well */
17770                 move = env->subprog_cnt + 1 - j;
17771
17772                 memmove(env->subprog_info + i,
17773                         env->subprog_info + j,
17774                         sizeof(*env->subprog_info) * move);
17775                 env->subprog_cnt -= j - i;
17776
17777                 /* remove func_info */
17778                 if (aux->func_info) {
17779                         move = aux->func_info_cnt - j;
17780
17781                         memmove(aux->func_info + i,
17782                                 aux->func_info + j,
17783                                 sizeof(*aux->func_info) * move);
17784                         aux->func_info_cnt -= j - i;
17785                         /* func_info->insn_off is set after all code rewrites,
17786                          * in adjust_btf_func() - no need to adjust
17787                          */
17788                 }
17789         } else {
17790                 /* convert i from "first prog to remove" to "first to adjust" */
17791                 if (env->subprog_info[i].start == off)
17792                         i++;
17793         }
17794
17795         /* update fake 'exit' subprog as well */
17796         for (; i <= env->subprog_cnt; i++)
17797                 env->subprog_info[i].start -= cnt;
17798
17799         return 0;
17800 }
17801
17802 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
17803                                       u32 cnt)
17804 {
17805         struct bpf_prog *prog = env->prog;
17806         u32 i, l_off, l_cnt, nr_linfo;
17807         struct bpf_line_info *linfo;
17808
17809         nr_linfo = prog->aux->nr_linfo;
17810         if (!nr_linfo)
17811                 return 0;
17812
17813         linfo = prog->aux->linfo;
17814
17815         /* find first line info to remove, count lines to be removed */
17816         for (i = 0; i < nr_linfo; i++)
17817                 if (linfo[i].insn_off >= off)
17818                         break;
17819
17820         l_off = i;
17821         l_cnt = 0;
17822         for (; i < nr_linfo; i++)
17823                 if (linfo[i].insn_off < off + cnt)
17824                         l_cnt++;
17825                 else
17826                         break;
17827
17828         /* First live insn doesn't match first live linfo, it needs to "inherit"
17829          * last removed linfo.  prog is already modified, so prog->len == off
17830          * means no live instructions after (tail of the program was removed).
17831          */
17832         if (prog->len != off && l_cnt &&
17833             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
17834                 l_cnt--;
17835                 linfo[--i].insn_off = off + cnt;
17836         }
17837
17838         /* remove the line info which refer to the removed instructions */
17839         if (l_cnt) {
17840                 memmove(linfo + l_off, linfo + i,
17841                         sizeof(*linfo) * (nr_linfo - i));
17842
17843                 prog->aux->nr_linfo -= l_cnt;
17844                 nr_linfo = prog->aux->nr_linfo;
17845         }
17846
17847         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
17848         for (i = l_off; i < nr_linfo; i++)
17849                 linfo[i].insn_off -= cnt;
17850
17851         /* fix up all subprogs (incl. 'exit') which start >= off */
17852         for (i = 0; i <= env->subprog_cnt; i++)
17853                 if (env->subprog_info[i].linfo_idx > l_off) {
17854                         /* program may have started in the removed region but
17855                          * may not be fully removed
17856                          */
17857                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
17858                                 env->subprog_info[i].linfo_idx -= l_cnt;
17859                         else
17860                                 env->subprog_info[i].linfo_idx = l_off;
17861                 }
17862
17863         return 0;
17864 }
17865
17866 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
17867 {
17868         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17869         unsigned int orig_prog_len = env->prog->len;
17870         int err;
17871
17872         if (bpf_prog_is_offloaded(env->prog->aux))
17873                 bpf_prog_offload_remove_insns(env, off, cnt);
17874
17875         err = bpf_remove_insns(env->prog, off, cnt);
17876         if (err)
17877                 return err;
17878
17879         err = adjust_subprog_starts_after_remove(env, off, cnt);
17880         if (err)
17881                 return err;
17882
17883         err = bpf_adj_linfo_after_remove(env, off, cnt);
17884         if (err)
17885                 return err;
17886
17887         memmove(aux_data + off, aux_data + off + cnt,
17888                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
17889
17890         return 0;
17891 }
17892
17893 /* The verifier does more data flow analysis than llvm and will not
17894  * explore branches that are dead at run time. Malicious programs can
17895  * have dead code too. Therefore replace all dead at-run-time code
17896  * with 'ja -1'.
17897  *
17898  * Just nops are not optimal, e.g. if they would sit at the end of the
17899  * program and through another bug we would manage to jump there, then
17900  * we'd execute beyond program memory otherwise. Returning exception
17901  * code also wouldn't work since we can have subprogs where the dead
17902  * code could be located.
17903  */
17904 static void sanitize_dead_code(struct bpf_verifier_env *env)
17905 {
17906         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17907         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
17908         struct bpf_insn *insn = env->prog->insnsi;
17909         const int insn_cnt = env->prog->len;
17910         int i;
17911
17912         for (i = 0; i < insn_cnt; i++) {
17913                 if (aux_data[i].seen)
17914                         continue;
17915                 memcpy(insn + i, &trap, sizeof(trap));
17916                 aux_data[i].zext_dst = false;
17917         }
17918 }
17919
17920 static bool insn_is_cond_jump(u8 code)
17921 {
17922         u8 op;
17923
17924         op = BPF_OP(code);
17925         if (BPF_CLASS(code) == BPF_JMP32)
17926                 return op != BPF_JA;
17927
17928         if (BPF_CLASS(code) != BPF_JMP)
17929                 return false;
17930
17931         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
17932 }
17933
17934 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
17935 {
17936         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17937         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
17938         struct bpf_insn *insn = env->prog->insnsi;
17939         const int insn_cnt = env->prog->len;
17940         int i;
17941
17942         for (i = 0; i < insn_cnt; i++, insn++) {
17943                 if (!insn_is_cond_jump(insn->code))
17944                         continue;
17945
17946                 if (!aux_data[i + 1].seen)
17947                         ja.off = insn->off;
17948                 else if (!aux_data[i + 1 + insn->off].seen)
17949                         ja.off = 0;
17950                 else
17951                         continue;
17952
17953                 if (bpf_prog_is_offloaded(env->prog->aux))
17954                         bpf_prog_offload_replace_insn(env, i, &ja);
17955
17956                 memcpy(insn, &ja, sizeof(ja));
17957         }
17958 }
17959
17960 static int opt_remove_dead_code(struct bpf_verifier_env *env)
17961 {
17962         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
17963         int insn_cnt = env->prog->len;
17964         int i, err;
17965
17966         for (i = 0; i < insn_cnt; i++) {
17967                 int j;
17968
17969                 j = 0;
17970                 while (i + j < insn_cnt && !aux_data[i + j].seen)
17971                         j++;
17972                 if (!j)
17973                         continue;
17974
17975                 err = verifier_remove_insns(env, i, j);
17976                 if (err)
17977                         return err;
17978                 insn_cnt = env->prog->len;
17979         }
17980
17981         return 0;
17982 }
17983
17984 static int opt_remove_nops(struct bpf_verifier_env *env)
17985 {
17986         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
17987         struct bpf_insn *insn = env->prog->insnsi;
17988         int insn_cnt = env->prog->len;
17989         int i, err;
17990
17991         for (i = 0; i < insn_cnt; i++) {
17992                 if (memcmp(&insn[i], &ja, sizeof(ja)))
17993                         continue;
17994
17995                 err = verifier_remove_insns(env, i, 1);
17996                 if (err)
17997                         return err;
17998                 insn_cnt--;
17999                 i--;
18000         }
18001
18002         return 0;
18003 }
18004
18005 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
18006                                          const union bpf_attr *attr)
18007 {
18008         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
18009         struct bpf_insn_aux_data *aux = env->insn_aux_data;
18010         int i, patch_len, delta = 0, len = env->prog->len;
18011         struct bpf_insn *insns = env->prog->insnsi;
18012         struct bpf_prog *new_prog;
18013         bool rnd_hi32;
18014
18015         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
18016         zext_patch[1] = BPF_ZEXT_REG(0);
18017         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
18018         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
18019         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
18020         for (i = 0; i < len; i++) {
18021                 int adj_idx = i + delta;
18022                 struct bpf_insn insn;
18023                 int load_reg;
18024
18025                 insn = insns[adj_idx];
18026                 load_reg = insn_def_regno(&insn);
18027                 if (!aux[adj_idx].zext_dst) {
18028                         u8 code, class;
18029                         u32 imm_rnd;
18030
18031                         if (!rnd_hi32)
18032                                 continue;
18033
18034                         code = insn.code;
18035                         class = BPF_CLASS(code);
18036                         if (load_reg == -1)
18037                                 continue;
18038
18039                         /* NOTE: arg "reg" (the fourth one) is only used for
18040                          *       BPF_STX + SRC_OP, so it is safe to pass NULL
18041                          *       here.
18042                          */
18043                         if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
18044                                 if (class == BPF_LD &&
18045                                     BPF_MODE(code) == BPF_IMM)
18046                                         i++;
18047                                 continue;
18048                         }
18049
18050                         /* ctx load could be transformed into wider load. */
18051                         if (class == BPF_LDX &&
18052                             aux[adj_idx].ptr_type == PTR_TO_CTX)
18053                                 continue;
18054
18055                         imm_rnd = get_random_u32();
18056                         rnd_hi32_patch[0] = insn;
18057                         rnd_hi32_patch[1].imm = imm_rnd;
18058                         rnd_hi32_patch[3].dst_reg = load_reg;
18059                         patch = rnd_hi32_patch;
18060                         patch_len = 4;
18061                         goto apply_patch_buffer;
18062                 }
18063
18064                 /* Add in an zero-extend instruction if a) the JIT has requested
18065                  * it or b) it's a CMPXCHG.
18066                  *
18067                  * The latter is because: BPF_CMPXCHG always loads a value into
18068                  * R0, therefore always zero-extends. However some archs'
18069                  * equivalent instruction only does this load when the
18070                  * comparison is successful. This detail of CMPXCHG is
18071                  * orthogonal to the general zero-extension behaviour of the
18072                  * CPU, so it's treated independently of bpf_jit_needs_zext.
18073                  */
18074                 if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
18075                         continue;
18076
18077                 /* Zero-extension is done by the caller. */
18078                 if (bpf_pseudo_kfunc_call(&insn))
18079                         continue;
18080
18081                 if (WARN_ON(load_reg == -1)) {
18082                         verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
18083                         return -EFAULT;
18084                 }
18085
18086                 zext_patch[0] = insn;
18087                 zext_patch[1].dst_reg = load_reg;
18088                 zext_patch[1].src_reg = load_reg;
18089                 patch = zext_patch;
18090                 patch_len = 2;
18091 apply_patch_buffer:
18092                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
18093                 if (!new_prog)
18094                         return -ENOMEM;
18095                 env->prog = new_prog;
18096                 insns = new_prog->insnsi;
18097                 aux = env->insn_aux_data;
18098                 delta += patch_len - 1;
18099         }
18100
18101         return 0;
18102 }
18103
18104 /* convert load instructions that access fields of a context type into a
18105  * sequence of instructions that access fields of the underlying structure:
18106  *     struct __sk_buff    -> struct sk_buff
18107  *     struct bpf_sock_ops -> struct sock
18108  */
18109 static int convert_ctx_accesses(struct bpf_verifier_env *env)
18110 {
18111         const struct bpf_verifier_ops *ops = env->ops;
18112         int i, cnt, size, ctx_field_size, delta = 0;
18113         const int insn_cnt = env->prog->len;
18114         struct bpf_insn insn_buf[16], *insn;
18115         u32 target_size, size_default, off;
18116         struct bpf_prog *new_prog;
18117         enum bpf_access_type type;
18118         bool is_narrower_load;
18119
18120         if (ops->gen_prologue || env->seen_direct_write) {
18121                 if (!ops->gen_prologue) {
18122                         verbose(env, "bpf verifier is misconfigured\n");
18123                         return -EINVAL;
18124                 }
18125                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
18126                                         env->prog);
18127                 if (cnt >= ARRAY_SIZE(insn_buf)) {
18128                         verbose(env, "bpf verifier is misconfigured\n");
18129                         return -EINVAL;
18130                 } else if (cnt) {
18131                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
18132                         if (!new_prog)
18133                                 return -ENOMEM;
18134
18135                         env->prog = new_prog;
18136                         delta += cnt - 1;
18137                 }
18138         }
18139
18140         if (bpf_prog_is_offloaded(env->prog->aux))
18141                 return 0;
18142
18143         insn = env->prog->insnsi + delta;
18144
18145         for (i = 0; i < insn_cnt; i++, insn++) {
18146                 bpf_convert_ctx_access_t convert_ctx_access;
18147                 u8 mode;
18148
18149                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
18150                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
18151                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
18152                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW) ||
18153                     insn->code == (BPF_LDX | BPF_MEMSX | BPF_B) ||
18154                     insn->code == (BPF_LDX | BPF_MEMSX | BPF_H) ||
18155                     insn->code == (BPF_LDX | BPF_MEMSX | BPF_W)) {
18156                         type = BPF_READ;
18157                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
18158                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
18159                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
18160                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
18161                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
18162                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
18163                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
18164                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
18165                         type = BPF_WRITE;
18166                 } else {
18167                         continue;
18168                 }
18169
18170                 if (type == BPF_WRITE &&
18171                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
18172                         struct bpf_insn patch[] = {
18173                                 *insn,
18174                                 BPF_ST_NOSPEC(),
18175                         };
18176
18177                         cnt = ARRAY_SIZE(patch);
18178                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
18179                         if (!new_prog)
18180                                 return -ENOMEM;
18181
18182                         delta    += cnt - 1;
18183                         env->prog = new_prog;
18184                         insn      = new_prog->insnsi + i + delta;
18185                         continue;
18186                 }
18187
18188                 switch ((int)env->insn_aux_data[i + delta].ptr_type) {
18189                 case PTR_TO_CTX:
18190                         if (!ops->convert_ctx_access)
18191                                 continue;
18192                         convert_ctx_access = ops->convert_ctx_access;
18193                         break;
18194                 case PTR_TO_SOCKET:
18195                 case PTR_TO_SOCK_COMMON:
18196                         convert_ctx_access = bpf_sock_convert_ctx_access;
18197                         break;
18198                 case PTR_TO_TCP_SOCK:
18199                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
18200                         break;
18201                 case PTR_TO_XDP_SOCK:
18202                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
18203                         break;
18204                 case PTR_TO_BTF_ID:
18205                 case PTR_TO_BTF_ID | PTR_UNTRUSTED:
18206                 /* PTR_TO_BTF_ID | MEM_ALLOC always has a valid lifetime, unlike
18207                  * PTR_TO_BTF_ID, and an active ref_obj_id, but the same cannot
18208                  * be said once it is marked PTR_UNTRUSTED, hence we must handle
18209                  * any faults for loads into such types. BPF_WRITE is disallowed
18210                  * for this case.
18211                  */
18212                 case PTR_TO_BTF_ID | MEM_ALLOC | PTR_UNTRUSTED:
18213                         if (type == BPF_READ) {
18214                                 if (BPF_MODE(insn->code) == BPF_MEM)
18215                                         insn->code = BPF_LDX | BPF_PROBE_MEM |
18216                                                      BPF_SIZE((insn)->code);
18217                                 else
18218                                         insn->code = BPF_LDX | BPF_PROBE_MEMSX |
18219                                                      BPF_SIZE((insn)->code);
18220                                 env->prog->aux->num_exentries++;
18221                         }
18222                         continue;
18223                 default:
18224                         continue;
18225                 }
18226
18227                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
18228                 size = BPF_LDST_BYTES(insn);
18229                 mode = BPF_MODE(insn->code);
18230
18231                 /* If the read access is a narrower load of the field,
18232                  * convert to a 4/8-byte load, to minimum program type specific
18233                  * convert_ctx_access changes. If conversion is successful,
18234                  * we will apply proper mask to the result.
18235                  */
18236                 is_narrower_load = size < ctx_field_size;
18237                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
18238                 off = insn->off;
18239                 if (is_narrower_load) {
18240                         u8 size_code;
18241
18242                         if (type == BPF_WRITE) {
18243                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
18244                                 return -EINVAL;
18245                         }
18246
18247                         size_code = BPF_H;
18248                         if (ctx_field_size == 4)
18249                                 size_code = BPF_W;
18250                         else if (ctx_field_size == 8)
18251                                 size_code = BPF_DW;
18252
18253                         insn->off = off & ~(size_default - 1);
18254                         insn->code = BPF_LDX | BPF_MEM | size_code;
18255                 }
18256
18257                 target_size = 0;
18258                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
18259                                          &target_size);
18260                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
18261                     (ctx_field_size && !target_size)) {
18262                         verbose(env, "bpf verifier is misconfigured\n");
18263                         return -EINVAL;
18264                 }
18265
18266                 if (is_narrower_load && size < target_size) {
18267                         u8 shift = bpf_ctx_narrow_access_offset(
18268                                 off, size, size_default) * 8;
18269                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
18270                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
18271                                 return -EINVAL;
18272                         }
18273                         if (ctx_field_size <= 4) {
18274                                 if (shift)
18275                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
18276                                                                         insn->dst_reg,
18277                                                                         shift);
18278                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
18279                                                                 (1 << size * 8) - 1);
18280                         } else {
18281                                 if (shift)
18282                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
18283                                                                         insn->dst_reg,
18284                                                                         shift);
18285                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
18286                                                                 (1ULL << size * 8) - 1);
18287                         }
18288                 }
18289                 if (mode == BPF_MEMSX)
18290                         insn_buf[cnt++] = BPF_RAW_INSN(BPF_ALU64 | BPF_MOV | BPF_X,
18291                                                        insn->dst_reg, insn->dst_reg,
18292                                                        size * 8, 0);
18293
18294                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18295                 if (!new_prog)
18296                         return -ENOMEM;
18297
18298                 delta += cnt - 1;
18299
18300                 /* keep walking new program and skip insns we just inserted */
18301                 env->prog = new_prog;
18302                 insn      = new_prog->insnsi + i + delta;
18303         }
18304
18305         return 0;
18306 }
18307
18308 static int jit_subprogs(struct bpf_verifier_env *env)
18309 {
18310         struct bpf_prog *prog = env->prog, **func, *tmp;
18311         int i, j, subprog_start, subprog_end = 0, len, subprog;
18312         struct bpf_map *map_ptr;
18313         struct bpf_insn *insn;
18314         void *old_bpf_func;
18315         int err, num_exentries;
18316
18317         if (env->subprog_cnt <= 1)
18318                 return 0;
18319
18320         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
18321                 if (!bpf_pseudo_func(insn) && !bpf_pseudo_call(insn))
18322                         continue;
18323
18324                 /* Upon error here we cannot fall back to interpreter but
18325                  * need a hard reject of the program. Thus -EFAULT is
18326                  * propagated in any case.
18327                  */
18328                 subprog = find_subprog(env, i + insn->imm + 1);
18329                 if (subprog < 0) {
18330                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
18331                                   i + insn->imm + 1);
18332                         return -EFAULT;
18333                 }
18334                 /* temporarily remember subprog id inside insn instead of
18335                  * aux_data, since next loop will split up all insns into funcs
18336                  */
18337                 insn->off = subprog;
18338                 /* remember original imm in case JIT fails and fallback
18339                  * to interpreter will be needed
18340                  */
18341                 env->insn_aux_data[i].call_imm = insn->imm;
18342                 /* point imm to __bpf_call_base+1 from JITs point of view */
18343                 insn->imm = 1;
18344                 if (bpf_pseudo_func(insn))
18345                         /* jit (e.g. x86_64) may emit fewer instructions
18346                          * if it learns a u32 imm is the same as a u64 imm.
18347                          * Force a non zero here.
18348                          */
18349                         insn[1].imm = 1;
18350         }
18351
18352         err = bpf_prog_alloc_jited_linfo(prog);
18353         if (err)
18354                 goto out_undo_insn;
18355
18356         err = -ENOMEM;
18357         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
18358         if (!func)
18359                 goto out_undo_insn;
18360
18361         for (i = 0; i < env->subprog_cnt; i++) {
18362                 subprog_start = subprog_end;
18363                 subprog_end = env->subprog_info[i + 1].start;
18364
18365                 len = subprog_end - subprog_start;
18366                 /* bpf_prog_run() doesn't call subprogs directly,
18367                  * hence main prog stats include the runtime of subprogs.
18368                  * subprogs don't have IDs and not reachable via prog_get_next_id
18369                  * func[i]->stats will never be accessed and stays NULL
18370                  */
18371                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
18372                 if (!func[i])
18373                         goto out_free;
18374                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
18375                        len * sizeof(struct bpf_insn));
18376                 func[i]->type = prog->type;
18377                 func[i]->len = len;
18378                 if (bpf_prog_calc_tag(func[i]))
18379                         goto out_free;
18380                 func[i]->is_func = 1;
18381                 func[i]->aux->func_idx = i;
18382                 /* Below members will be freed only at prog->aux */
18383                 func[i]->aux->btf = prog->aux->btf;
18384                 func[i]->aux->func_info = prog->aux->func_info;
18385                 func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
18386                 func[i]->aux->poke_tab = prog->aux->poke_tab;
18387                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
18388
18389                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
18390                         struct bpf_jit_poke_descriptor *poke;
18391
18392                         poke = &prog->aux->poke_tab[j];
18393                         if (poke->insn_idx < subprog_end &&
18394                             poke->insn_idx >= subprog_start)
18395                                 poke->aux = func[i]->aux;
18396                 }
18397
18398                 func[i]->aux->name[0] = 'F';
18399                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
18400                 func[i]->jit_requested = 1;
18401                 func[i]->blinding_requested = prog->blinding_requested;
18402                 func[i]->aux->kfunc_tab = prog->aux->kfunc_tab;
18403                 func[i]->aux->kfunc_btf_tab = prog->aux->kfunc_btf_tab;
18404                 func[i]->aux->linfo = prog->aux->linfo;
18405                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
18406                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
18407                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
18408                 num_exentries = 0;
18409                 insn = func[i]->insnsi;
18410                 for (j = 0; j < func[i]->len; j++, insn++) {
18411                         if (BPF_CLASS(insn->code) == BPF_LDX &&
18412                             (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
18413                              BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
18414                                 num_exentries++;
18415                 }
18416                 func[i]->aux->num_exentries = num_exentries;
18417                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
18418                 func[i] = bpf_int_jit_compile(func[i]);
18419                 if (!func[i]->jited) {
18420                         err = -ENOTSUPP;
18421                         goto out_free;
18422                 }
18423                 cond_resched();
18424         }
18425
18426         /* at this point all bpf functions were successfully JITed
18427          * now populate all bpf_calls with correct addresses and
18428          * run last pass of JIT
18429          */
18430         for (i = 0; i < env->subprog_cnt; i++) {
18431                 insn = func[i]->insnsi;
18432                 for (j = 0; j < func[i]->len; j++, insn++) {
18433                         if (bpf_pseudo_func(insn)) {
18434                                 subprog = insn->off;
18435                                 insn[0].imm = (u32)(long)func[subprog]->bpf_func;
18436                                 insn[1].imm = ((u64)(long)func[subprog]->bpf_func) >> 32;
18437                                 continue;
18438                         }
18439                         if (!bpf_pseudo_call(insn))
18440                                 continue;
18441                         subprog = insn->off;
18442                         insn->imm = BPF_CALL_IMM(func[subprog]->bpf_func);
18443                 }
18444
18445                 /* we use the aux data to keep a list of the start addresses
18446                  * of the JITed images for each function in the program
18447                  *
18448                  * for some architectures, such as powerpc64, the imm field
18449                  * might not be large enough to hold the offset of the start
18450                  * address of the callee's JITed image from __bpf_call_base
18451                  *
18452                  * in such cases, we can lookup the start address of a callee
18453                  * by using its subprog id, available from the off field of
18454                  * the call instruction, as an index for this list
18455                  */
18456                 func[i]->aux->func = func;
18457                 func[i]->aux->func_cnt = env->subprog_cnt;
18458         }
18459         for (i = 0; i < env->subprog_cnt; i++) {
18460                 old_bpf_func = func[i]->bpf_func;
18461                 tmp = bpf_int_jit_compile(func[i]);
18462                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
18463                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
18464                         err = -ENOTSUPP;
18465                         goto out_free;
18466                 }
18467                 cond_resched();
18468         }
18469
18470         /* finally lock prog and jit images for all functions and
18471          * populate kallsysm. Begin at the first subprogram, since
18472          * bpf_prog_load will add the kallsyms for the main program.
18473          */
18474         for (i = 1; i < env->subprog_cnt; i++) {
18475                 bpf_prog_lock_ro(func[i]);
18476                 bpf_prog_kallsyms_add(func[i]);
18477         }
18478
18479         /* Last step: make now unused interpreter insns from main
18480          * prog consistent for later dump requests, so they can
18481          * later look the same as if they were interpreted only.
18482          */
18483         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
18484                 if (bpf_pseudo_func(insn)) {
18485                         insn[0].imm = env->insn_aux_data[i].call_imm;
18486                         insn[1].imm = insn->off;
18487                         insn->off = 0;
18488                         continue;
18489                 }
18490                 if (!bpf_pseudo_call(insn))
18491                         continue;
18492                 insn->off = env->insn_aux_data[i].call_imm;
18493                 subprog = find_subprog(env, i + insn->off + 1);
18494                 insn->imm = subprog;
18495         }
18496
18497         prog->jited = 1;
18498         prog->bpf_func = func[0]->bpf_func;
18499         prog->jited_len = func[0]->jited_len;
18500         prog->aux->extable = func[0]->aux->extable;
18501         prog->aux->num_exentries = func[0]->aux->num_exentries;
18502         prog->aux->func = func;
18503         prog->aux->func_cnt = env->subprog_cnt;
18504         bpf_prog_jit_attempt_done(prog);
18505         return 0;
18506 out_free:
18507         /* We failed JIT'ing, so at this point we need to unregister poke
18508          * descriptors from subprogs, so that kernel is not attempting to
18509          * patch it anymore as we're freeing the subprog JIT memory.
18510          */
18511         for (i = 0; i < prog->aux->size_poke_tab; i++) {
18512                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
18513                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
18514         }
18515         /* At this point we're guaranteed that poke descriptors are not
18516          * live anymore. We can just unlink its descriptor table as it's
18517          * released with the main prog.
18518          */
18519         for (i = 0; i < env->subprog_cnt; i++) {
18520                 if (!func[i])
18521                         continue;
18522                 func[i]->aux->poke_tab = NULL;
18523                 bpf_jit_free(func[i]);
18524         }
18525         kfree(func);
18526 out_undo_insn:
18527         /* cleanup main prog to be interpreted */
18528         prog->jit_requested = 0;
18529         prog->blinding_requested = 0;
18530         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
18531                 if (!bpf_pseudo_call(insn))
18532                         continue;
18533                 insn->off = 0;
18534                 insn->imm = env->insn_aux_data[i].call_imm;
18535         }
18536         bpf_prog_jit_attempt_done(prog);
18537         return err;
18538 }
18539
18540 static int fixup_call_args(struct bpf_verifier_env *env)
18541 {
18542 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
18543         struct bpf_prog *prog = env->prog;
18544         struct bpf_insn *insn = prog->insnsi;
18545         bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
18546         int i, depth;
18547 #endif
18548         int err = 0;
18549
18550         if (env->prog->jit_requested &&
18551             !bpf_prog_is_offloaded(env->prog->aux)) {
18552                 err = jit_subprogs(env);
18553                 if (err == 0)
18554                         return 0;
18555                 if (err == -EFAULT)
18556                         return err;
18557         }
18558 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
18559         if (has_kfunc_call) {
18560                 verbose(env, "calling kernel functions are not allowed in non-JITed programs\n");
18561                 return -EINVAL;
18562         }
18563         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
18564                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
18565                  * have to be rejected, since interpreter doesn't support them yet.
18566                  */
18567                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
18568                 return -EINVAL;
18569         }
18570         for (i = 0; i < prog->len; i++, insn++) {
18571                 if (bpf_pseudo_func(insn)) {
18572                         /* When JIT fails the progs with callback calls
18573                          * have to be rejected, since interpreter doesn't support them yet.
18574                          */
18575                         verbose(env, "callbacks are not allowed in non-JITed programs\n");
18576                         return -EINVAL;
18577                 }
18578
18579                 if (!bpf_pseudo_call(insn))
18580                         continue;
18581                 depth = get_callee_stack_depth(env, insn, i);
18582                 if (depth < 0)
18583                         return depth;
18584                 bpf_patch_call_args(insn, depth);
18585         }
18586         err = 0;
18587 #endif
18588         return err;
18589 }
18590
18591 /* replace a generic kfunc with a specialized version if necessary */
18592 static void specialize_kfunc(struct bpf_verifier_env *env,
18593                              u32 func_id, u16 offset, unsigned long *addr)
18594 {
18595         struct bpf_prog *prog = env->prog;
18596         bool seen_direct_write;
18597         void *xdp_kfunc;
18598         bool is_rdonly;
18599
18600         if (bpf_dev_bound_kfunc_id(func_id)) {
18601                 xdp_kfunc = bpf_dev_bound_resolve_kfunc(prog, func_id);
18602                 if (xdp_kfunc) {
18603                         *addr = (unsigned long)xdp_kfunc;
18604                         return;
18605                 }
18606                 /* fallback to default kfunc when not supported by netdev */
18607         }
18608
18609         if (offset)
18610                 return;
18611
18612         if (func_id == special_kfunc_list[KF_bpf_dynptr_from_skb]) {
18613                 seen_direct_write = env->seen_direct_write;
18614                 is_rdonly = !may_access_direct_pkt_data(env, NULL, BPF_WRITE);
18615
18616                 if (is_rdonly)
18617                         *addr = (unsigned long)bpf_dynptr_from_skb_rdonly;
18618
18619                 /* restore env->seen_direct_write to its original value, since
18620                  * may_access_direct_pkt_data mutates it
18621                  */
18622                 env->seen_direct_write = seen_direct_write;
18623         }
18624 }
18625
18626 static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
18627                                             u16 struct_meta_reg,
18628                                             u16 node_offset_reg,
18629                                             struct bpf_insn *insn,
18630                                             struct bpf_insn *insn_buf,
18631                                             int *cnt)
18632 {
18633         struct btf_struct_meta *kptr_struct_meta = insn_aux->kptr_struct_meta;
18634         struct bpf_insn addr[2] = { BPF_LD_IMM64(struct_meta_reg, (long)kptr_struct_meta) };
18635
18636         insn_buf[0] = addr[0];
18637         insn_buf[1] = addr[1];
18638         insn_buf[2] = BPF_MOV64_IMM(node_offset_reg, insn_aux->insert_off);
18639         insn_buf[3] = *insn;
18640         *cnt = 4;
18641 }
18642
18643 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
18644                             struct bpf_insn *insn_buf, int insn_idx, int *cnt)
18645 {
18646         const struct bpf_kfunc_desc *desc;
18647
18648         if (!insn->imm) {
18649                 verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
18650                 return -EINVAL;
18651         }
18652
18653         *cnt = 0;
18654
18655         /* insn->imm has the btf func_id. Replace it with an offset relative to
18656          * __bpf_call_base, unless the JIT needs to call functions that are
18657          * further than 32 bits away (bpf_jit_supports_far_kfunc_call()).
18658          */
18659         desc = find_kfunc_desc(env->prog, insn->imm, insn->off);
18660         if (!desc) {
18661                 verbose(env, "verifier internal error: kernel function descriptor not found for func_id %u\n",
18662                         insn->imm);
18663                 return -EFAULT;
18664         }
18665
18666         if (!bpf_jit_supports_far_kfunc_call())
18667                 insn->imm = BPF_CALL_IMM(desc->addr);
18668         if (insn->off)
18669                 return 0;
18670         if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
18671                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
18672                 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
18673                 u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
18674
18675                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
18676                 insn_buf[1] = addr[0];
18677                 insn_buf[2] = addr[1];
18678                 insn_buf[3] = *insn;
18679                 *cnt = 4;
18680         } else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
18681                    desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
18682                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
18683                 struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
18684
18685                 if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
18686                     !kptr_struct_meta) {
18687                         verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
18688                                 insn_idx);
18689                         return -EFAULT;
18690                 }
18691
18692                 insn_buf[0] = addr[0];
18693                 insn_buf[1] = addr[1];
18694                 insn_buf[2] = *insn;
18695                 *cnt = 3;
18696         } else if (desc->func_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
18697                    desc->func_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
18698                    desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
18699                 struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
18700                 int struct_meta_reg = BPF_REG_3;
18701                 int node_offset_reg = BPF_REG_4;
18702
18703                 /* rbtree_add has extra 'less' arg, so args-to-fixup are in diff regs */
18704                 if (desc->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
18705                         struct_meta_reg = BPF_REG_4;
18706                         node_offset_reg = BPF_REG_5;
18707                 }
18708
18709                 if (!kptr_struct_meta) {
18710                         verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
18711                                 insn_idx);
18712                         return -EFAULT;
18713                 }
18714
18715                 __fixup_collection_insert_kfunc(&env->insn_aux_data[insn_idx], struct_meta_reg,
18716                                                 node_offset_reg, insn, insn_buf, cnt);
18717         } else if (desc->func_id == special_kfunc_list[KF_bpf_cast_to_kern_ctx] ||
18718                    desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
18719                 insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
18720                 *cnt = 1;
18721         }
18722         return 0;
18723 }
18724
18725 /* Do various post-verification rewrites in a single program pass.
18726  * These rewrites simplify JIT and interpreter implementations.
18727  */
18728 static int do_misc_fixups(struct bpf_verifier_env *env)
18729 {
18730         struct bpf_prog *prog = env->prog;
18731         enum bpf_attach_type eatype = prog->expected_attach_type;
18732         enum bpf_prog_type prog_type = resolve_prog_type(prog);
18733         struct bpf_insn *insn = prog->insnsi;
18734         const struct bpf_func_proto *fn;
18735         const int insn_cnt = prog->len;
18736         const struct bpf_map_ops *ops;
18737         struct bpf_insn_aux_data *aux;
18738         struct bpf_insn insn_buf[16];
18739         struct bpf_prog *new_prog;
18740         struct bpf_map *map_ptr;
18741         int i, ret, cnt, delta = 0;
18742
18743         for (i = 0; i < insn_cnt; i++, insn++) {
18744                 /* Make divide-by-zero exceptions impossible. */
18745                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
18746                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
18747                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
18748                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
18749                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
18750                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
18751                         struct bpf_insn *patchlet;
18752                         struct bpf_insn chk_and_div[] = {
18753                                 /* [R,W]x div 0 -> 0 */
18754                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
18755                                              BPF_JNE | BPF_K, insn->src_reg,
18756                                              0, 2, 0),
18757                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
18758                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
18759                                 *insn,
18760                         };
18761                         struct bpf_insn chk_and_mod[] = {
18762                                 /* [R,W]x mod 0 -> [R,W]x */
18763                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
18764                                              BPF_JEQ | BPF_K, insn->src_reg,
18765                                              0, 1 + (is64 ? 0 : 1), 0),
18766                                 *insn,
18767                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
18768                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
18769                         };
18770
18771                         patchlet = isdiv ? chk_and_div : chk_and_mod;
18772                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
18773                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
18774
18775                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
18776                         if (!new_prog)
18777                                 return -ENOMEM;
18778
18779                         delta    += cnt - 1;
18780                         env->prog = prog = new_prog;
18781                         insn      = new_prog->insnsi + i + delta;
18782                         continue;
18783                 }
18784
18785                 /* Implement LD_ABS and LD_IND with a rewrite, if supported by the program type. */
18786                 if (BPF_CLASS(insn->code) == BPF_LD &&
18787                     (BPF_MODE(insn->code) == BPF_ABS ||
18788                      BPF_MODE(insn->code) == BPF_IND)) {
18789                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
18790                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
18791                                 verbose(env, "bpf verifier is misconfigured\n");
18792                                 return -EINVAL;
18793                         }
18794
18795                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18796                         if (!new_prog)
18797                                 return -ENOMEM;
18798
18799                         delta    += cnt - 1;
18800                         env->prog = prog = new_prog;
18801                         insn      = new_prog->insnsi + i + delta;
18802                         continue;
18803                 }
18804
18805                 /* Rewrite pointer arithmetic to mitigate speculation attacks. */
18806                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
18807                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
18808                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
18809                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
18810                         struct bpf_insn *patch = &insn_buf[0];
18811                         bool issrc, isneg, isimm;
18812                         u32 off_reg;
18813
18814                         aux = &env->insn_aux_data[i + delta];
18815                         if (!aux->alu_state ||
18816                             aux->alu_state == BPF_ALU_NON_POINTER)
18817                                 continue;
18818
18819                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
18820                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
18821                                 BPF_ALU_SANITIZE_SRC;
18822                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
18823
18824                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
18825                         if (isimm) {
18826                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
18827                         } else {
18828                                 if (isneg)
18829                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
18830                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
18831                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
18832                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
18833                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
18834                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
18835                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
18836                         }
18837                         if (!issrc)
18838                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
18839                         insn->src_reg = BPF_REG_AX;
18840                         if (isneg)
18841                                 insn->code = insn->code == code_add ?
18842                                              code_sub : code_add;
18843                         *patch++ = *insn;
18844                         if (issrc && isneg && !isimm)
18845                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
18846                         cnt = patch - insn_buf;
18847
18848                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18849                         if (!new_prog)
18850                                 return -ENOMEM;
18851
18852                         delta    += cnt - 1;
18853                         env->prog = prog = new_prog;
18854                         insn      = new_prog->insnsi + i + delta;
18855                         continue;
18856                 }
18857
18858                 if (insn->code != (BPF_JMP | BPF_CALL))
18859                         continue;
18860                 if (insn->src_reg == BPF_PSEUDO_CALL)
18861                         continue;
18862                 if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
18863                         ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
18864                         if (ret)
18865                                 return ret;
18866                         if (cnt == 0)
18867                                 continue;
18868
18869                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18870                         if (!new_prog)
18871                                 return -ENOMEM;
18872
18873                         delta    += cnt - 1;
18874                         env->prog = prog = new_prog;
18875                         insn      = new_prog->insnsi + i + delta;
18876                         continue;
18877                 }
18878
18879                 if (insn->imm == BPF_FUNC_get_route_realm)
18880                         prog->dst_needed = 1;
18881                 if (insn->imm == BPF_FUNC_get_prandom_u32)
18882                         bpf_user_rnd_init_once();
18883                 if (insn->imm == BPF_FUNC_override_return)
18884                         prog->kprobe_override = 1;
18885                 if (insn->imm == BPF_FUNC_tail_call) {
18886                         /* If we tail call into other programs, we
18887                          * cannot make any assumptions since they can
18888                          * be replaced dynamically during runtime in
18889                          * the program array.
18890                          */
18891                         prog->cb_access = 1;
18892                         if (!allow_tail_call_in_subprogs(env))
18893                                 prog->aux->stack_depth = MAX_BPF_STACK;
18894                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
18895
18896                         /* mark bpf_tail_call as different opcode to avoid
18897                          * conditional branch in the interpreter for every normal
18898                          * call and to prevent accidental JITing by JIT compiler
18899                          * that doesn't support bpf_tail_call yet
18900                          */
18901                         insn->imm = 0;
18902                         insn->code = BPF_JMP | BPF_TAIL_CALL;
18903
18904                         aux = &env->insn_aux_data[i + delta];
18905                         if (env->bpf_capable && !prog->blinding_requested &&
18906                             prog->jit_requested &&
18907                             !bpf_map_key_poisoned(aux) &&
18908                             !bpf_map_ptr_poisoned(aux) &&
18909                             !bpf_map_ptr_unpriv(aux)) {
18910                                 struct bpf_jit_poke_descriptor desc = {
18911                                         .reason = BPF_POKE_REASON_TAIL_CALL,
18912                                         .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
18913                                         .tail_call.key = bpf_map_key_immediate(aux),
18914                                         .insn_idx = i + delta,
18915                                 };
18916
18917                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
18918                                 if (ret < 0) {
18919                                         verbose(env, "adding tail call poke descriptor failed\n");
18920                                         return ret;
18921                                 }
18922
18923                                 insn->imm = ret + 1;
18924                                 continue;
18925                         }
18926
18927                         if (!bpf_map_ptr_unpriv(aux))
18928                                 continue;
18929
18930                         /* instead of changing every JIT dealing with tail_call
18931                          * emit two extra insns:
18932                          * if (index >= max_entries) goto out;
18933                          * index &= array->index_mask;
18934                          * to avoid out-of-bounds cpu speculation
18935                          */
18936                         if (bpf_map_ptr_poisoned(aux)) {
18937                                 verbose(env, "tail_call abusing map_ptr\n");
18938                                 return -EINVAL;
18939                         }
18940
18941                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
18942                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
18943                                                   map_ptr->max_entries, 2);
18944                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
18945                                                     container_of(map_ptr,
18946                                                                  struct bpf_array,
18947                                                                  map)->index_mask);
18948                         insn_buf[2] = *insn;
18949                         cnt = 3;
18950                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18951                         if (!new_prog)
18952                                 return -ENOMEM;
18953
18954                         delta    += cnt - 1;
18955                         env->prog = prog = new_prog;
18956                         insn      = new_prog->insnsi + i + delta;
18957                         continue;
18958                 }
18959
18960                 if (insn->imm == BPF_FUNC_timer_set_callback) {
18961                         /* The verifier will process callback_fn as many times as necessary
18962                          * with different maps and the register states prepared by
18963                          * set_timer_callback_state will be accurate.
18964                          *
18965                          * The following use case is valid:
18966                          *   map1 is shared by prog1, prog2, prog3.
18967                          *   prog1 calls bpf_timer_init for some map1 elements
18968                          *   prog2 calls bpf_timer_set_callback for some map1 elements.
18969                          *     Those that were not bpf_timer_init-ed will return -EINVAL.
18970                          *   prog3 calls bpf_timer_start for some map1 elements.
18971                          *     Those that were not both bpf_timer_init-ed and
18972                          *     bpf_timer_set_callback-ed will return -EINVAL.
18973                          */
18974                         struct bpf_insn ld_addrs[2] = {
18975                                 BPF_LD_IMM64(BPF_REG_3, (long)prog->aux),
18976                         };
18977
18978                         insn_buf[0] = ld_addrs[0];
18979                         insn_buf[1] = ld_addrs[1];
18980                         insn_buf[2] = *insn;
18981                         cnt = 3;
18982
18983                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
18984                         if (!new_prog)
18985                                 return -ENOMEM;
18986
18987                         delta    += cnt - 1;
18988                         env->prog = prog = new_prog;
18989                         insn      = new_prog->insnsi + i + delta;
18990                         goto patch_call_imm;
18991                 }
18992
18993                 if (is_storage_get_function(insn->imm)) {
18994                         if (!env->prog->aux->sleepable ||
18995                             env->insn_aux_data[i + delta].storage_get_func_atomic)
18996                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_ATOMIC);
18997                         else
18998                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_5, (__force __s32)GFP_KERNEL);
18999                         insn_buf[1] = *insn;
19000                         cnt = 2;
19001
19002                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
19003                         if (!new_prog)
19004                                 return -ENOMEM;
19005
19006                         delta += cnt - 1;
19007                         env->prog = prog = new_prog;
19008                         insn = new_prog->insnsi + i + delta;
19009                         goto patch_call_imm;
19010                 }
19011
19012                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
19013                  * and other inlining handlers are currently limited to 64 bit
19014                  * only.
19015                  */
19016                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
19017                     (insn->imm == BPF_FUNC_map_lookup_elem ||
19018                      insn->imm == BPF_FUNC_map_update_elem ||
19019                      insn->imm == BPF_FUNC_map_delete_elem ||
19020                      insn->imm == BPF_FUNC_map_push_elem   ||
19021                      insn->imm == BPF_FUNC_map_pop_elem    ||
19022                      insn->imm == BPF_FUNC_map_peek_elem   ||
19023                      insn->imm == BPF_FUNC_redirect_map    ||
19024                      insn->imm == BPF_FUNC_for_each_map_elem ||
19025                      insn->imm == BPF_FUNC_map_lookup_percpu_elem)) {
19026                         aux = &env->insn_aux_data[i + delta];
19027                         if (bpf_map_ptr_poisoned(aux))
19028                                 goto patch_call_imm;
19029
19030                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
19031                         ops = map_ptr->ops;
19032                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
19033                             ops->map_gen_lookup) {
19034                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
19035                                 if (cnt == -EOPNOTSUPP)
19036                                         goto patch_map_ops_generic;
19037                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
19038                                         verbose(env, "bpf verifier is misconfigured\n");
19039                                         return -EINVAL;
19040                                 }
19041
19042                                 new_prog = bpf_patch_insn_data(env, i + delta,
19043                                                                insn_buf, cnt);
19044                                 if (!new_prog)
19045                                         return -ENOMEM;
19046
19047                                 delta    += cnt - 1;
19048                                 env->prog = prog = new_prog;
19049                                 insn      = new_prog->insnsi + i + delta;
19050                                 continue;
19051                         }
19052
19053                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
19054                                      (void *(*)(struct bpf_map *map, void *key))NULL));
19055                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
19056                                      (long (*)(struct bpf_map *map, void *key))NULL));
19057                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
19058                                      (long (*)(struct bpf_map *map, void *key, void *value,
19059                                               u64 flags))NULL));
19060                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
19061                                      (long (*)(struct bpf_map *map, void *value,
19062                                               u64 flags))NULL));
19063                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
19064                                      (long (*)(struct bpf_map *map, void *value))NULL));
19065                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
19066                                      (long (*)(struct bpf_map *map, void *value))NULL));
19067                         BUILD_BUG_ON(!__same_type(ops->map_redirect,
19068                                      (long (*)(struct bpf_map *map, u64 index, u64 flags))NULL));
19069                         BUILD_BUG_ON(!__same_type(ops->map_for_each_callback,
19070                                      (long (*)(struct bpf_map *map,
19071                                               bpf_callback_t callback_fn,
19072                                               void *callback_ctx,
19073                                               u64 flags))NULL));
19074                         BUILD_BUG_ON(!__same_type(ops->map_lookup_percpu_elem,
19075                                      (void *(*)(struct bpf_map *map, void *key, u32 cpu))NULL));
19076
19077 patch_map_ops_generic:
19078                         switch (insn->imm) {
19079                         case BPF_FUNC_map_lookup_elem:
19080                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_elem);
19081                                 continue;
19082                         case BPF_FUNC_map_update_elem:
19083                                 insn->imm = BPF_CALL_IMM(ops->map_update_elem);
19084                                 continue;
19085                         case BPF_FUNC_map_delete_elem:
19086                                 insn->imm = BPF_CALL_IMM(ops->map_delete_elem);
19087                                 continue;
19088                         case BPF_FUNC_map_push_elem:
19089                                 insn->imm = BPF_CALL_IMM(ops->map_push_elem);
19090                                 continue;
19091                         case BPF_FUNC_map_pop_elem:
19092                                 insn->imm = BPF_CALL_IMM(ops->map_pop_elem);
19093                                 continue;
19094                         case BPF_FUNC_map_peek_elem:
19095                                 insn->imm = BPF_CALL_IMM(ops->map_peek_elem);
19096                                 continue;
19097                         case BPF_FUNC_redirect_map:
19098                                 insn->imm = BPF_CALL_IMM(ops->map_redirect);
19099                                 continue;
19100                         case BPF_FUNC_for_each_map_elem:
19101                                 insn->imm = BPF_CALL_IMM(ops->map_for_each_callback);
19102                                 continue;
19103                         case BPF_FUNC_map_lookup_percpu_elem:
19104                                 insn->imm = BPF_CALL_IMM(ops->map_lookup_percpu_elem);
19105                                 continue;
19106                         }
19107
19108                         goto patch_call_imm;
19109                 }
19110
19111                 /* Implement bpf_jiffies64 inline. */
19112                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
19113                     insn->imm == BPF_FUNC_jiffies64) {
19114                         struct bpf_insn ld_jiffies_addr[2] = {
19115                                 BPF_LD_IMM64(BPF_REG_0,
19116                                              (unsigned long)&jiffies),
19117                         };
19118
19119                         insn_buf[0] = ld_jiffies_addr[0];
19120                         insn_buf[1] = ld_jiffies_addr[1];
19121                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
19122                                                   BPF_REG_0, 0);
19123                         cnt = 3;
19124
19125                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
19126                                                        cnt);
19127                         if (!new_prog)
19128                                 return -ENOMEM;
19129
19130                         delta    += cnt - 1;
19131                         env->prog = prog = new_prog;
19132                         insn      = new_prog->insnsi + i + delta;
19133                         continue;
19134                 }
19135
19136                 /* Implement bpf_get_func_arg inline. */
19137                 if (prog_type == BPF_PROG_TYPE_TRACING &&
19138                     insn->imm == BPF_FUNC_get_func_arg) {
19139                         /* Load nr_args from ctx - 8 */
19140                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19141                         insn_buf[1] = BPF_JMP32_REG(BPF_JGE, BPF_REG_2, BPF_REG_0, 6);
19142                         insn_buf[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 3);
19143                         insn_buf[3] = BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1);
19144                         insn_buf[4] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0);
19145                         insn_buf[5] = BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
19146                         insn_buf[6] = BPF_MOV64_IMM(BPF_REG_0, 0);
19147                         insn_buf[7] = BPF_JMP_A(1);
19148                         insn_buf[8] = BPF_MOV64_IMM(BPF_REG_0, -EINVAL);
19149                         cnt = 9;
19150
19151                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
19152                         if (!new_prog)
19153                                 return -ENOMEM;
19154
19155                         delta    += cnt - 1;
19156                         env->prog = prog = new_prog;
19157                         insn      = new_prog->insnsi + i + delta;
19158                         continue;
19159                 }
19160
19161                 /* Implement bpf_get_func_ret inline. */
19162                 if (prog_type == BPF_PROG_TYPE_TRACING &&
19163                     insn->imm == BPF_FUNC_get_func_ret) {
19164                         if (eatype == BPF_TRACE_FEXIT ||
19165                             eatype == BPF_MODIFY_RETURN) {
19166                                 /* Load nr_args from ctx - 8 */
19167                                 insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19168                                 insn_buf[1] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
19169                                 insn_buf[2] = BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1);
19170                                 insn_buf[3] = BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0);
19171                                 insn_buf[4] = BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, 0);
19172                                 insn_buf[5] = BPF_MOV64_IMM(BPF_REG_0, 0);
19173                                 cnt = 6;
19174                         } else {
19175                                 insn_buf[0] = BPF_MOV64_IMM(BPF_REG_0, -EOPNOTSUPP);
19176                                 cnt = 1;
19177                         }
19178
19179                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
19180                         if (!new_prog)
19181                                 return -ENOMEM;
19182
19183                         delta    += cnt - 1;
19184                         env->prog = prog = new_prog;
19185                         insn      = new_prog->insnsi + i + delta;
19186                         continue;
19187                 }
19188
19189                 /* Implement get_func_arg_cnt inline. */
19190                 if (prog_type == BPF_PROG_TYPE_TRACING &&
19191                     insn->imm == BPF_FUNC_get_func_arg_cnt) {
19192                         /* Load nr_args from ctx - 8 */
19193                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8);
19194
19195                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
19196                         if (!new_prog)
19197                                 return -ENOMEM;
19198
19199                         env->prog = prog = new_prog;
19200                         insn      = new_prog->insnsi + i + delta;
19201                         continue;
19202                 }
19203
19204                 /* Implement bpf_get_func_ip inline. */
19205                 if (prog_type == BPF_PROG_TYPE_TRACING &&
19206                     insn->imm == BPF_FUNC_get_func_ip) {
19207                         /* Load IP address from ctx - 16 */
19208                         insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -16);
19209
19210                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, 1);
19211                         if (!new_prog)
19212                                 return -ENOMEM;
19213
19214                         env->prog = prog = new_prog;
19215                         insn      = new_prog->insnsi + i + delta;
19216                         continue;
19217                 }
19218
19219 patch_call_imm:
19220                 fn = env->ops->get_func_proto(insn->imm, env->prog);
19221                 /* all functions that have prototype and verifier allowed
19222                  * programs to call them, must be real in-kernel functions
19223                  */
19224                 if (!fn->func) {
19225                         verbose(env,
19226                                 "kernel subsystem misconfigured func %s#%d\n",
19227                                 func_id_name(insn->imm), insn->imm);
19228                         return -EFAULT;
19229                 }
19230                 insn->imm = fn->func - __bpf_call_base;
19231         }
19232
19233         /* Since poke tab is now finalized, publish aux to tracker. */
19234         for (i = 0; i < prog->aux->size_poke_tab; i++) {
19235                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
19236                 if (!map_ptr->ops->map_poke_track ||
19237                     !map_ptr->ops->map_poke_untrack ||
19238                     !map_ptr->ops->map_poke_run) {
19239                         verbose(env, "bpf verifier is misconfigured\n");
19240                         return -EINVAL;
19241                 }
19242
19243                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
19244                 if (ret < 0) {
19245                         verbose(env, "tracking tail call prog failed\n");
19246                         return ret;
19247                 }
19248         }
19249
19250         sort_kfunc_descs_by_imm_off(env->prog);
19251
19252         return 0;
19253 }
19254
19255 static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
19256                                         int position,
19257                                         s32 stack_base,
19258                                         u32 callback_subprogno,
19259                                         u32 *cnt)
19260 {
19261         s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
19262         s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
19263         s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
19264         int reg_loop_max = BPF_REG_6;
19265         int reg_loop_cnt = BPF_REG_7;
19266         int reg_loop_ctx = BPF_REG_8;
19267
19268         struct bpf_prog *new_prog;
19269         u32 callback_start;
19270         u32 call_insn_offset;
19271         s32 callback_offset;
19272
19273         /* This represents an inlined version of bpf_iter.c:bpf_loop,
19274          * be careful to modify this code in sync.
19275          */
19276         struct bpf_insn insn_buf[] = {
19277                 /* Return error and jump to the end of the patch if
19278                  * expected number of iterations is too big.
19279                  */
19280                 BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
19281                 BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
19282                 BPF_JMP_IMM(BPF_JA, 0, 0, 16),
19283                 /* spill R6, R7, R8 to use these as loop vars */
19284                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
19285                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
19286                 BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
19287                 /* initialize loop vars */
19288                 BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
19289                 BPF_MOV32_IMM(reg_loop_cnt, 0),
19290                 BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
19291                 /* loop header,
19292                  * if reg_loop_cnt >= reg_loop_max skip the loop body
19293                  */
19294                 BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
19295                 /* callback call,
19296                  * correct callback offset would be set after patching
19297                  */
19298                 BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
19299                 BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
19300                 BPF_CALL_REL(0),
19301                 /* increment loop counter */
19302                 BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
19303                 /* jump to loop header if callback returned 0 */
19304                 BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
19305                 /* return value of bpf_loop,
19306                  * set R0 to the number of iterations
19307                  */
19308                 BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
19309                 /* restore original values of R6, R7, R8 */
19310                 BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
19311                 BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
19312                 BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
19313         };
19314
19315         *cnt = ARRAY_SIZE(insn_buf);
19316         new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
19317         if (!new_prog)
19318                 return new_prog;
19319
19320         /* callback start is known only after patching */
19321         callback_start = env->subprog_info[callback_subprogno].start;
19322         /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
19323         call_insn_offset = position + 12;
19324         callback_offset = callback_start - call_insn_offset - 1;
19325         new_prog->insnsi[call_insn_offset].imm = callback_offset;
19326
19327         return new_prog;
19328 }
19329
19330 static bool is_bpf_loop_call(struct bpf_insn *insn)
19331 {
19332         return insn->code == (BPF_JMP | BPF_CALL) &&
19333                 insn->src_reg == 0 &&
19334                 insn->imm == BPF_FUNC_loop;
19335 }
19336
19337 /* For all sub-programs in the program (including main) check
19338  * insn_aux_data to see if there are bpf_loop calls that require
19339  * inlining. If such calls are found the calls are replaced with a
19340  * sequence of instructions produced by `inline_bpf_loop` function and
19341  * subprog stack_depth is increased by the size of 3 registers.
19342  * This stack space is used to spill values of the R6, R7, R8.  These
19343  * registers are used to store the loop bound, counter and context
19344  * variables.
19345  */
19346 static int optimize_bpf_loop(struct bpf_verifier_env *env)
19347 {
19348         struct bpf_subprog_info *subprogs = env->subprog_info;
19349         int i, cur_subprog = 0, cnt, delta = 0;
19350         struct bpf_insn *insn = env->prog->insnsi;
19351         int insn_cnt = env->prog->len;
19352         u16 stack_depth = subprogs[cur_subprog].stack_depth;
19353         u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
19354         u16 stack_depth_extra = 0;
19355
19356         for (i = 0; i < insn_cnt; i++, insn++) {
19357                 struct bpf_loop_inline_state *inline_state =
19358                         &env->insn_aux_data[i + delta].loop_inline_state;
19359
19360                 if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
19361                         struct bpf_prog *new_prog;
19362
19363                         stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
19364                         new_prog = inline_bpf_loop(env,
19365                                                    i + delta,
19366                                                    -(stack_depth + stack_depth_extra),
19367                                                    inline_state->callback_subprogno,
19368                                                    &cnt);
19369                         if (!new_prog)
19370                                 return -ENOMEM;
19371
19372                         delta     += cnt - 1;
19373                         env->prog  = new_prog;
19374                         insn       = new_prog->insnsi + i + delta;
19375                 }
19376
19377                 if (subprogs[cur_subprog + 1].start == i + delta + 1) {
19378                         subprogs[cur_subprog].stack_depth += stack_depth_extra;
19379                         cur_subprog++;
19380                         stack_depth = subprogs[cur_subprog].stack_depth;
19381                         stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
19382                         stack_depth_extra = 0;
19383                 }
19384         }
19385
19386         env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
19387
19388         return 0;
19389 }
19390
19391 static void free_states(struct bpf_verifier_env *env)
19392 {
19393         struct bpf_verifier_state_list *sl, *sln;
19394         int i;
19395
19396         sl = env->free_list;
19397         while (sl) {
19398                 sln = sl->next;
19399                 free_verifier_state(&sl->state, false);
19400                 kfree(sl);
19401                 sl = sln;
19402         }
19403         env->free_list = NULL;
19404
19405         if (!env->explored_states)
19406                 return;
19407
19408         for (i = 0; i < state_htab_size(env); i++) {
19409                 sl = env->explored_states[i];
19410
19411                 while (sl) {
19412                         sln = sl->next;
19413                         free_verifier_state(&sl->state, false);
19414                         kfree(sl);
19415                         sl = sln;
19416                 }
19417                 env->explored_states[i] = NULL;
19418         }
19419 }
19420
19421 static int do_check_common(struct bpf_verifier_env *env, int subprog)
19422 {
19423         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
19424         struct bpf_verifier_state *state;
19425         struct bpf_reg_state *regs;
19426         int ret, i;
19427
19428         env->prev_linfo = NULL;
19429         env->pass_cnt++;
19430
19431         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
19432         if (!state)
19433                 return -ENOMEM;
19434         state->curframe = 0;
19435         state->speculative = false;
19436         state->branches = 1;
19437         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
19438         if (!state->frame[0]) {
19439                 kfree(state);
19440                 return -ENOMEM;
19441         }
19442         env->cur_state = state;
19443         init_func_state(env, state->frame[0],
19444                         BPF_MAIN_FUNC /* callsite */,
19445                         0 /* frameno */,
19446                         subprog);
19447         state->first_insn_idx = env->subprog_info[subprog].start;
19448         state->last_insn_idx = -1;
19449
19450         regs = state->frame[state->curframe]->regs;
19451         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
19452                 ret = btf_prepare_func_args(env, subprog, regs);
19453                 if (ret)
19454                         goto out;
19455                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
19456                         if (regs[i].type == PTR_TO_CTX)
19457                                 mark_reg_known_zero(env, regs, i);
19458                         else if (regs[i].type == SCALAR_VALUE)
19459                                 mark_reg_unknown(env, regs, i);
19460                         else if (base_type(regs[i].type) == PTR_TO_MEM) {
19461                                 const u32 mem_size = regs[i].mem_size;
19462
19463                                 mark_reg_known_zero(env, regs, i);
19464                                 regs[i].mem_size = mem_size;
19465                                 regs[i].id = ++env->id_gen;
19466                         }
19467                 }
19468         } else {
19469                 /* 1st arg to a function */
19470                 regs[BPF_REG_1].type = PTR_TO_CTX;
19471                 mark_reg_known_zero(env, regs, BPF_REG_1);
19472                 ret = btf_check_subprog_arg_match(env, subprog, regs);
19473                 if (ret == -EFAULT)
19474                         /* unlikely verifier bug. abort.
19475                          * ret == 0 and ret < 0 are sadly acceptable for
19476                          * main() function due to backward compatibility.
19477                          * Like socket filter program may be written as:
19478                          * int bpf_prog(struct pt_regs *ctx)
19479                          * and never dereference that ctx in the program.
19480                          * 'struct pt_regs' is a type mismatch for socket
19481                          * filter that should be using 'struct __sk_buff'.
19482                          */
19483                         goto out;
19484         }
19485
19486         ret = do_check(env);
19487 out:
19488         /* check for NULL is necessary, since cur_state can be freed inside
19489          * do_check() under memory pressure.
19490          */
19491         if (env->cur_state) {
19492                 free_verifier_state(env->cur_state, true);
19493                 env->cur_state = NULL;
19494         }
19495         while (!pop_stack(env, NULL, NULL, false));
19496         if (!ret && pop_log)
19497                 bpf_vlog_reset(&env->log, 0);
19498         free_states(env);
19499         return ret;
19500 }
19501
19502 /* Verify all global functions in a BPF program one by one based on their BTF.
19503  * All global functions must pass verification. Otherwise the whole program is rejected.
19504  * Consider:
19505  * int bar(int);
19506  * int foo(int f)
19507  * {
19508  *    return bar(f);
19509  * }
19510  * int bar(int b)
19511  * {
19512  *    ...
19513  * }
19514  * foo() will be verified first for R1=any_scalar_value. During verification it
19515  * will be assumed that bar() already verified successfully and call to bar()
19516  * from foo() will be checked for type match only. Later bar() will be verified
19517  * independently to check that it's safe for R1=any_scalar_value.
19518  */
19519 static int do_check_subprogs(struct bpf_verifier_env *env)
19520 {
19521         struct bpf_prog_aux *aux = env->prog->aux;
19522         int i, ret;
19523
19524         if (!aux->func_info)
19525                 return 0;
19526
19527         for (i = 1; i < env->subprog_cnt; i++) {
19528                 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
19529                         continue;
19530                 env->insn_idx = env->subprog_info[i].start;
19531                 WARN_ON_ONCE(env->insn_idx == 0);
19532                 ret = do_check_common(env, i);
19533                 if (ret) {
19534                         return ret;
19535                 } else if (env->log.level & BPF_LOG_LEVEL) {
19536                         verbose(env,
19537                                 "Func#%d is safe for any args that match its prototype\n",
19538                                 i);
19539                 }
19540         }
19541         return 0;
19542 }
19543
19544 static int do_check_main(struct bpf_verifier_env *env)
19545 {
19546         int ret;
19547
19548         env->insn_idx = 0;
19549         ret = do_check_common(env, 0);
19550         if (!ret)
19551                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
19552         return ret;
19553 }
19554
19555
19556 static void print_verification_stats(struct bpf_verifier_env *env)
19557 {
19558         int i;
19559
19560         if (env->log.level & BPF_LOG_STATS) {
19561                 verbose(env, "verification time %lld usec\n",
19562                         div_u64(env->verification_time, 1000));
19563                 verbose(env, "stack depth ");
19564                 for (i = 0; i < env->subprog_cnt; i++) {
19565                         u32 depth = env->subprog_info[i].stack_depth;
19566
19567                         verbose(env, "%d", depth);
19568                         if (i + 1 < env->subprog_cnt)
19569                                 verbose(env, "+");
19570                 }
19571                 verbose(env, "\n");
19572         }
19573         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
19574                 "total_states %d peak_states %d mark_read %d\n",
19575                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
19576                 env->max_states_per_insn, env->total_states,
19577                 env->peak_states, env->longest_mark_read_walk);
19578 }
19579
19580 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
19581 {
19582         const struct btf_type *t, *func_proto;
19583         const struct bpf_struct_ops *st_ops;
19584         const struct btf_member *member;
19585         struct bpf_prog *prog = env->prog;
19586         u32 btf_id, member_idx;
19587         const char *mname;
19588
19589         if (!prog->gpl_compatible) {
19590                 verbose(env, "struct ops programs must have a GPL compatible license\n");
19591                 return -EINVAL;
19592         }
19593
19594         btf_id = prog->aux->attach_btf_id;
19595         st_ops = bpf_struct_ops_find(btf_id);
19596         if (!st_ops) {
19597                 verbose(env, "attach_btf_id %u is not a supported struct\n",
19598                         btf_id);
19599                 return -ENOTSUPP;
19600         }
19601
19602         t = st_ops->type;
19603         member_idx = prog->expected_attach_type;
19604         if (member_idx >= btf_type_vlen(t)) {
19605                 verbose(env, "attach to invalid member idx %u of struct %s\n",
19606                         member_idx, st_ops->name);
19607                 return -EINVAL;
19608         }
19609
19610         member = &btf_type_member(t)[member_idx];
19611         mname = btf_name_by_offset(btf_vmlinux, member->name_off);
19612         func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
19613                                                NULL);
19614         if (!func_proto) {
19615                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
19616                         mname, member_idx, st_ops->name);
19617                 return -EINVAL;
19618         }
19619
19620         if (st_ops->check_member) {
19621                 int err = st_ops->check_member(t, member, prog);
19622
19623                 if (err) {
19624                         verbose(env, "attach to unsupported member %s of struct %s\n",
19625                                 mname, st_ops->name);
19626                         return err;
19627                 }
19628         }
19629
19630         prog->aux->attach_func_proto = func_proto;
19631         prog->aux->attach_func_name = mname;
19632         env->ops = st_ops->verifier_ops;
19633
19634         return 0;
19635 }
19636 #define SECURITY_PREFIX "security_"
19637
19638 static int check_attach_modify_return(unsigned long addr, const char *func_name)
19639 {
19640         if (within_error_injection_list(addr) ||
19641             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
19642                 return 0;
19643
19644         return -EINVAL;
19645 }
19646
19647 /* list of non-sleepable functions that are otherwise on
19648  * ALLOW_ERROR_INJECTION list
19649  */
19650 BTF_SET_START(btf_non_sleepable_error_inject)
19651 /* Three functions below can be called from sleepable and non-sleepable context.
19652  * Assume non-sleepable from bpf safety point of view.
19653  */
19654 BTF_ID(func, __filemap_add_folio)
19655 BTF_ID(func, should_fail_alloc_page)
19656 BTF_ID(func, should_failslab)
19657 BTF_SET_END(btf_non_sleepable_error_inject)
19658
19659 static int check_non_sleepable_error_inject(u32 btf_id)
19660 {
19661         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
19662 }
19663
19664 int bpf_check_attach_target(struct bpf_verifier_log *log,
19665                             const struct bpf_prog *prog,
19666                             const struct bpf_prog *tgt_prog,
19667                             u32 btf_id,
19668                             struct bpf_attach_target_info *tgt_info)
19669 {
19670         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
19671         const char prefix[] = "btf_trace_";
19672         int ret = 0, subprog = -1, i;
19673         const struct btf_type *t;
19674         bool conservative = true;
19675         const char *tname;
19676         struct btf *btf;
19677         long addr = 0;
19678         struct module *mod = NULL;
19679
19680         if (!btf_id) {
19681                 bpf_log(log, "Tracing programs must provide btf_id\n");
19682                 return -EINVAL;
19683         }
19684         btf = tgt_prog ? tgt_prog->aux->btf : prog->aux->attach_btf;
19685         if (!btf) {
19686                 bpf_log(log,
19687                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
19688                 return -EINVAL;
19689         }
19690         t = btf_type_by_id(btf, btf_id);
19691         if (!t) {
19692                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
19693                 return -EINVAL;
19694         }
19695         tname = btf_name_by_offset(btf, t->name_off);
19696         if (!tname) {
19697                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
19698                 return -EINVAL;
19699         }
19700         if (tgt_prog) {
19701                 struct bpf_prog_aux *aux = tgt_prog->aux;
19702
19703                 if (bpf_prog_is_dev_bound(prog->aux) &&
19704                     !bpf_prog_dev_bound_match(prog, tgt_prog)) {
19705                         bpf_log(log, "Target program bound device mismatch");
19706                         return -EINVAL;
19707                 }
19708
19709                 for (i = 0; i < aux->func_info_cnt; i++)
19710                         if (aux->func_info[i].type_id == btf_id) {
19711                                 subprog = i;
19712                                 break;
19713                         }
19714                 if (subprog == -1) {
19715                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
19716                         return -EINVAL;
19717                 }
19718                 conservative = aux->func_info_aux[subprog].unreliable;
19719                 if (prog_extension) {
19720                         if (conservative) {
19721                                 bpf_log(log,
19722                                         "Cannot replace static functions\n");
19723                                 return -EINVAL;
19724                         }
19725                         if (!prog->jit_requested) {
19726                                 bpf_log(log,
19727                                         "Extension programs should be JITed\n");
19728                                 return -EINVAL;
19729                         }
19730                 }
19731                 if (!tgt_prog->jited) {
19732                         bpf_log(log, "Can attach to only JITed progs\n");
19733                         return -EINVAL;
19734                 }
19735                 if (tgt_prog->type == prog->type) {
19736                         /* Cannot fentry/fexit another fentry/fexit program.
19737                          * Cannot attach program extension to another extension.
19738                          * It's ok to attach fentry/fexit to extension program.
19739                          */
19740                         bpf_log(log, "Cannot recursively attach\n");
19741                         return -EINVAL;
19742                 }
19743                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
19744                     prog_extension &&
19745                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
19746                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
19747                         /* Program extensions can extend all program types
19748                          * except fentry/fexit. The reason is the following.
19749                          * The fentry/fexit programs are used for performance
19750                          * analysis, stats and can be attached to any program
19751                          * type except themselves. When extension program is
19752                          * replacing XDP function it is necessary to allow
19753                          * performance analysis of all functions. Both original
19754                          * XDP program and its program extension. Hence
19755                          * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
19756                          * allowed. If extending of fentry/fexit was allowed it
19757                          * would be possible to create long call chain
19758                          * fentry->extension->fentry->extension beyond
19759                          * reasonable stack size. Hence extending fentry is not
19760                          * allowed.
19761                          */
19762                         bpf_log(log, "Cannot extend fentry/fexit\n");
19763                         return -EINVAL;
19764                 }
19765         } else {
19766                 if (prog_extension) {
19767                         bpf_log(log, "Cannot replace kernel functions\n");
19768                         return -EINVAL;
19769                 }
19770         }
19771
19772         switch (prog->expected_attach_type) {
19773         case BPF_TRACE_RAW_TP:
19774                 if (tgt_prog) {
19775                         bpf_log(log,
19776                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
19777                         return -EINVAL;
19778                 }
19779                 if (!btf_type_is_typedef(t)) {
19780                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
19781                                 btf_id);
19782                         return -EINVAL;
19783                 }
19784                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
19785                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
19786                                 btf_id, tname);
19787                         return -EINVAL;
19788                 }
19789                 tname += sizeof(prefix) - 1;
19790                 t = btf_type_by_id(btf, t->type);
19791                 if (!btf_type_is_ptr(t))
19792                         /* should never happen in valid vmlinux build */
19793                         return -EINVAL;
19794                 t = btf_type_by_id(btf, t->type);
19795                 if (!btf_type_is_func_proto(t))
19796                         /* should never happen in valid vmlinux build */
19797                         return -EINVAL;
19798
19799                 break;
19800         case BPF_TRACE_ITER:
19801                 if (!btf_type_is_func(t)) {
19802                         bpf_log(log, "attach_btf_id %u is not a function\n",
19803                                 btf_id);
19804                         return -EINVAL;
19805                 }
19806                 t = btf_type_by_id(btf, t->type);
19807                 if (!btf_type_is_func_proto(t))
19808                         return -EINVAL;
19809                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19810                 if (ret)
19811                         return ret;
19812                 break;
19813         default:
19814                 if (!prog_extension)
19815                         return -EINVAL;
19816                 fallthrough;
19817         case BPF_MODIFY_RETURN:
19818         case BPF_LSM_MAC:
19819         case BPF_LSM_CGROUP:
19820         case BPF_TRACE_FENTRY:
19821         case BPF_TRACE_FEXIT:
19822                 if (!btf_type_is_func(t)) {
19823                         bpf_log(log, "attach_btf_id %u is not a function\n",
19824                                 btf_id);
19825                         return -EINVAL;
19826                 }
19827                 if (prog_extension &&
19828                     btf_check_type_match(log, prog, btf, t))
19829                         return -EINVAL;
19830                 t = btf_type_by_id(btf, t->type);
19831                 if (!btf_type_is_func_proto(t))
19832                         return -EINVAL;
19833
19834                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
19835                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
19836                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
19837                         return -EINVAL;
19838
19839                 if (tgt_prog && conservative)
19840                         t = NULL;
19841
19842                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
19843                 if (ret < 0)
19844                         return ret;
19845
19846                 if (tgt_prog) {
19847                         if (subprog == 0)
19848                                 addr = (long) tgt_prog->bpf_func;
19849                         else
19850                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
19851                 } else {
19852                         if (btf_is_module(btf)) {
19853                                 mod = btf_try_get_module(btf);
19854                                 if (mod)
19855                                         addr = find_kallsyms_symbol_value(mod, tname);
19856                                 else
19857                                         addr = 0;
19858                         } else {
19859                                 addr = kallsyms_lookup_name(tname);
19860                         }
19861                         if (!addr) {
19862                                 module_put(mod);
19863                                 bpf_log(log,
19864                                         "The address of function %s cannot be found\n",
19865                                         tname);
19866                                 return -ENOENT;
19867                         }
19868                 }
19869
19870                 if (prog->aux->sleepable) {
19871                         ret = -EINVAL;
19872                         switch (prog->type) {
19873                         case BPF_PROG_TYPE_TRACING:
19874
19875                                 /* fentry/fexit/fmod_ret progs can be sleepable if they are
19876                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
19877                                  */
19878                                 if (!check_non_sleepable_error_inject(btf_id) &&
19879                                     within_error_injection_list(addr))
19880                                         ret = 0;
19881                                 /* fentry/fexit/fmod_ret progs can also be sleepable if they are
19882                                  * in the fmodret id set with the KF_SLEEPABLE flag.
19883                                  */
19884                                 else {
19885                                         u32 *flags = btf_kfunc_is_modify_return(btf, btf_id,
19886                                                                                 prog);
19887
19888                                         if (flags && (*flags & KF_SLEEPABLE))
19889                                                 ret = 0;
19890                                 }
19891                                 break;
19892                         case BPF_PROG_TYPE_LSM:
19893                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
19894                                  * Only some of them are sleepable.
19895                                  */
19896                                 if (bpf_lsm_is_sleepable_hook(btf_id))
19897                                         ret = 0;
19898                                 break;
19899                         default:
19900                                 break;
19901                         }
19902                         if (ret) {
19903                                 module_put(mod);
19904                                 bpf_log(log, "%s is not sleepable\n", tname);
19905                                 return ret;
19906                         }
19907                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
19908                         if (tgt_prog) {
19909                                 module_put(mod);
19910                                 bpf_log(log, "can't modify return codes of BPF programs\n");
19911                                 return -EINVAL;
19912                         }
19913                         ret = -EINVAL;
19914                         if (btf_kfunc_is_modify_return(btf, btf_id, prog) ||
19915                             !check_attach_modify_return(addr, tname))
19916                                 ret = 0;
19917                         if (ret) {
19918                                 module_put(mod);
19919                                 bpf_log(log, "%s() is not modifiable\n", tname);
19920                                 return ret;
19921                         }
19922                 }
19923
19924                 break;
19925         }
19926         tgt_info->tgt_addr = addr;
19927         tgt_info->tgt_name = tname;
19928         tgt_info->tgt_type = t;
19929         tgt_info->tgt_mod = mod;
19930         return 0;
19931 }
19932
19933 BTF_SET_START(btf_id_deny)
19934 BTF_ID_UNUSED
19935 #ifdef CONFIG_SMP
19936 BTF_ID(func, migrate_disable)
19937 BTF_ID(func, migrate_enable)
19938 #endif
19939 #if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
19940 BTF_ID(func, rcu_read_unlock_strict)
19941 #endif
19942 #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_TRACE_PREEMPT_TOGGLE)
19943 BTF_ID(func, preempt_count_add)
19944 BTF_ID(func, preempt_count_sub)
19945 #endif
19946 #ifdef CONFIG_PREEMPT_RCU
19947 BTF_ID(func, __rcu_read_lock)
19948 BTF_ID(func, __rcu_read_unlock)
19949 #endif
19950 BTF_SET_END(btf_id_deny)
19951
19952 static bool can_be_sleepable(struct bpf_prog *prog)
19953 {
19954         if (prog->type == BPF_PROG_TYPE_TRACING) {
19955                 switch (prog->expected_attach_type) {
19956                 case BPF_TRACE_FENTRY:
19957                 case BPF_TRACE_FEXIT:
19958                 case BPF_MODIFY_RETURN:
19959                 case BPF_TRACE_ITER:
19960                         return true;
19961                 default:
19962                         return false;
19963                 }
19964         }
19965         return prog->type == BPF_PROG_TYPE_LSM ||
19966                prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
19967                prog->type == BPF_PROG_TYPE_STRUCT_OPS;
19968 }
19969
19970 static int check_attach_btf_id(struct bpf_verifier_env *env)
19971 {
19972         struct bpf_prog *prog = env->prog;
19973         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
19974         struct bpf_attach_target_info tgt_info = {};
19975         u32 btf_id = prog->aux->attach_btf_id;
19976         struct bpf_trampoline *tr;
19977         int ret;
19978         u64 key;
19979
19980         if (prog->type == BPF_PROG_TYPE_SYSCALL) {
19981                 if (prog->aux->sleepable)
19982                         /* attach_btf_id checked to be zero already */
19983                         return 0;
19984                 verbose(env, "Syscall programs can only be sleepable\n");
19985                 return -EINVAL;
19986         }
19987
19988         if (prog->aux->sleepable && !can_be_sleepable(prog)) {
19989                 verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
19990                 return -EINVAL;
19991         }
19992
19993         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
19994                 return check_struct_ops_btf_id(env);
19995
19996         if (prog->type != BPF_PROG_TYPE_TRACING &&
19997             prog->type != BPF_PROG_TYPE_LSM &&
19998             prog->type != BPF_PROG_TYPE_EXT)
19999                 return 0;
20000
20001         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
20002         if (ret)
20003                 return ret;
20004
20005         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
20006                 /* to make freplace equivalent to their targets, they need to
20007                  * inherit env->ops and expected_attach_type for the rest of the
20008                  * verification
20009                  */
20010                 env->ops = bpf_verifier_ops[tgt_prog->type];
20011                 prog->expected_attach_type = tgt_prog->expected_attach_type;
20012         }
20013
20014         /* store info about the attachment target that will be used later */
20015         prog->aux->attach_func_proto = tgt_info.tgt_type;
20016         prog->aux->attach_func_name = tgt_info.tgt_name;
20017         prog->aux->mod = tgt_info.tgt_mod;
20018
20019         if (tgt_prog) {
20020                 prog->aux->saved_dst_prog_type = tgt_prog->type;
20021                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
20022         }
20023
20024         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
20025                 prog->aux->attach_btf_trace = true;
20026                 return 0;
20027         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
20028                 if (!bpf_iter_prog_supported(prog))
20029                         return -EINVAL;
20030                 return 0;
20031         }
20032
20033         if (prog->type == BPF_PROG_TYPE_LSM) {
20034                 ret = bpf_lsm_verify_prog(&env->log, prog);
20035                 if (ret < 0)
20036                         return ret;
20037         } else if (prog->type == BPF_PROG_TYPE_TRACING &&
20038                    btf_id_set_contains(&btf_id_deny, btf_id)) {
20039                 return -EINVAL;
20040         }
20041
20042         key = bpf_trampoline_compute_key(tgt_prog, prog->aux->attach_btf, btf_id);
20043         tr = bpf_trampoline_get(key, &tgt_info);
20044         if (!tr)
20045                 return -ENOMEM;
20046
20047         if (tgt_prog && tgt_prog->aux->tail_call_reachable)
20048                 tr->flags = BPF_TRAMP_F_TAIL_CALL_CTX;
20049
20050         prog->aux->dst_trampoline = tr;
20051         return 0;
20052 }
20053
20054 struct btf *bpf_get_btf_vmlinux(void)
20055 {
20056         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
20057                 mutex_lock(&bpf_verifier_lock);
20058                 if (!btf_vmlinux)
20059                         btf_vmlinux = btf_parse_vmlinux();
20060                 mutex_unlock(&bpf_verifier_lock);
20061         }
20062         return btf_vmlinux;
20063 }
20064
20065 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
20066 {
20067         u64 start_time = ktime_get_ns();
20068         struct bpf_verifier_env *env;
20069         int i, len, ret = -EINVAL, err;
20070         u32 log_true_size;
20071         bool is_priv;
20072
20073         /* no program is valid */
20074         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
20075                 return -EINVAL;
20076
20077         /* 'struct bpf_verifier_env' can be global, but since it's not small,
20078          * allocate/free it every time bpf_check() is called
20079          */
20080         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
20081         if (!env)
20082                 return -ENOMEM;
20083
20084         env->bt.env = env;
20085
20086         len = (*prog)->len;
20087         env->insn_aux_data =
20088                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
20089         ret = -ENOMEM;
20090         if (!env->insn_aux_data)
20091                 goto err_free_env;
20092         for (i = 0; i < len; i++)
20093                 env->insn_aux_data[i].orig_idx = i;
20094         env->prog = *prog;
20095         env->ops = bpf_verifier_ops[env->prog->type];
20096         env->fd_array = make_bpfptr(attr->fd_array, uattr.is_kernel);
20097         is_priv = bpf_capable();
20098
20099         bpf_get_btf_vmlinux();
20100
20101         /* grab the mutex to protect few globals used by verifier */
20102         if (!is_priv)
20103                 mutex_lock(&bpf_verifier_lock);
20104
20105         /* user could have requested verbose verifier output
20106          * and supplied buffer to store the verification trace
20107          */
20108         ret = bpf_vlog_init(&env->log, attr->log_level,
20109                             (char __user *) (unsigned long) attr->log_buf,
20110                             attr->log_size);
20111         if (ret)
20112                 goto err_unlock;
20113
20114         mark_verifier_state_clean(env);
20115
20116         if (IS_ERR(btf_vmlinux)) {
20117                 /* Either gcc or pahole or kernel are broken. */
20118                 verbose(env, "in-kernel BTF is malformed\n");
20119                 ret = PTR_ERR(btf_vmlinux);
20120                 goto skip_full_check;
20121         }
20122
20123         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
20124         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
20125                 env->strict_alignment = true;
20126         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
20127                 env->strict_alignment = false;
20128
20129         env->allow_ptr_leaks = bpf_allow_ptr_leaks();
20130         env->allow_uninit_stack = bpf_allow_uninit_stack();
20131         env->bypass_spec_v1 = bpf_bypass_spec_v1();
20132         env->bypass_spec_v4 = bpf_bypass_spec_v4();
20133         env->bpf_capable = bpf_capable();
20134
20135         if (is_priv)
20136                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
20137
20138         env->explored_states = kvcalloc(state_htab_size(env),
20139                                        sizeof(struct bpf_verifier_state_list *),
20140                                        GFP_USER);
20141         ret = -ENOMEM;
20142         if (!env->explored_states)
20143                 goto skip_full_check;
20144
20145         ret = add_subprog_and_kfunc(env);
20146         if (ret < 0)
20147                 goto skip_full_check;
20148
20149         ret = check_subprogs(env);
20150         if (ret < 0)
20151                 goto skip_full_check;
20152
20153         ret = check_btf_info(env, attr, uattr);
20154         if (ret < 0)
20155                 goto skip_full_check;
20156
20157         ret = check_attach_btf_id(env);
20158         if (ret)
20159                 goto skip_full_check;
20160
20161         ret = resolve_pseudo_ldimm64(env);
20162         if (ret < 0)
20163                 goto skip_full_check;
20164
20165         if (bpf_prog_is_offloaded(env->prog->aux)) {
20166                 ret = bpf_prog_offload_verifier_prep(env->prog);
20167                 if (ret)
20168                         goto skip_full_check;
20169         }
20170
20171         ret = check_cfg(env);
20172         if (ret < 0)
20173                 goto skip_full_check;
20174
20175         ret = do_check_subprogs(env);
20176         ret = ret ?: do_check_main(env);
20177
20178         if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
20179                 ret = bpf_prog_offload_finalize(env);
20180
20181 skip_full_check:
20182         kvfree(env->explored_states);
20183
20184         if (ret == 0)
20185                 ret = check_max_stack_depth(env);
20186
20187         /* instruction rewrites happen after this point */
20188         if (ret == 0)
20189                 ret = optimize_bpf_loop(env);
20190
20191         if (is_priv) {
20192                 if (ret == 0)
20193                         opt_hard_wire_dead_code_branches(env);
20194                 if (ret == 0)
20195                         ret = opt_remove_dead_code(env);
20196                 if (ret == 0)
20197                         ret = opt_remove_nops(env);
20198         } else {
20199                 if (ret == 0)
20200                         sanitize_dead_code(env);
20201         }
20202
20203         if (ret == 0)
20204                 /* program is valid, convert *(u32*)(ctx + off) accesses */
20205                 ret = convert_ctx_accesses(env);
20206
20207         if (ret == 0)
20208                 ret = do_misc_fixups(env);
20209
20210         /* do 32-bit optimization after insn patching has done so those patched
20211          * insns could be handled correctly.
20212          */
20213         if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
20214                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
20215                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
20216                                                                      : false;
20217         }
20218
20219         if (ret == 0)
20220                 ret = fixup_call_args(env);
20221
20222         env->verification_time = ktime_get_ns() - start_time;
20223         print_verification_stats(env);
20224         env->prog->aux->verified_insns = env->insn_processed;
20225
20226         /* preserve original error even if log finalization is successful */
20227         err = bpf_vlog_finalize(&env->log, &log_true_size);
20228         if (err)
20229                 ret = err;
20230
20231         if (uattr_size >= offsetofend(union bpf_attr, log_true_size) &&
20232             copy_to_bpfptr_offset(uattr, offsetof(union bpf_attr, log_true_size),
20233                                   &log_true_size, sizeof(log_true_size))) {
20234                 ret = -EFAULT;
20235                 goto err_release_maps;
20236         }
20237
20238         if (ret)
20239                 goto err_release_maps;
20240
20241         if (env->used_map_cnt) {
20242                 /* if program passed verifier, update used_maps in bpf_prog_info */
20243                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
20244                                                           sizeof(env->used_maps[0]),
20245                                                           GFP_KERNEL);
20246
20247                 if (!env->prog->aux->used_maps) {
20248                         ret = -ENOMEM;
20249                         goto err_release_maps;
20250                 }
20251
20252                 memcpy(env->prog->aux->used_maps, env->used_maps,
20253                        sizeof(env->used_maps[0]) * env->used_map_cnt);
20254                 env->prog->aux->used_map_cnt = env->used_map_cnt;
20255         }
20256         if (env->used_btf_cnt) {
20257                 /* if program passed verifier, update used_btfs in bpf_prog_aux */
20258                 env->prog->aux->used_btfs = kmalloc_array(env->used_btf_cnt,
20259                                                           sizeof(env->used_btfs[0]),
20260                                                           GFP_KERNEL);
20261                 if (!env->prog->aux->used_btfs) {
20262                         ret = -ENOMEM;
20263                         goto err_release_maps;
20264                 }
20265
20266                 memcpy(env->prog->aux->used_btfs, env->used_btfs,
20267                        sizeof(env->used_btfs[0]) * env->used_btf_cnt);
20268                 env->prog->aux->used_btf_cnt = env->used_btf_cnt;
20269         }
20270         if (env->used_map_cnt || env->used_btf_cnt) {
20271                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
20272                  * bpf_ld_imm64 instructions
20273                  */
20274                 convert_pseudo_ld_imm64(env);
20275         }
20276
20277         adjust_btf_func(env);
20278
20279 err_release_maps:
20280         if (!env->prog->aux->used_maps)
20281                 /* if we didn't copy map pointers into bpf_prog_info, release
20282                  * them now. Otherwise free_used_maps() will release them.
20283                  */
20284                 release_maps(env);
20285         if (!env->prog->aux->used_btfs)
20286                 release_btfs(env);
20287
20288         /* extension progs temporarily inherit the attach_type of their targets
20289            for verification purposes, so set it back to zero before returning
20290          */
20291         if (env->prog->type == BPF_PROG_TYPE_EXT)
20292                 env->prog->expected_attach_type = 0;
20293
20294         *prog = env->prog;
20295 err_unlock:
20296         if (!is_priv)
20297                 mutex_unlock(&bpf_verifier_lock);
20298         vfree(env->insn_aux_data);
20299 err_free_env:
20300         kfree(env);
20301         return ret;
20302 }