kernel/bpf/verifier.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
   3  * Copyright (c) 2016 Facebook
   4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
   5  */
   6 #include <uapi/linux/btf.h>
   7 #include <linux/kernel.h>
   8 #include <linux/types.h>
   9 #include <linux/slab.h>
  10 #include <linux/bpf.h>
  11 #include <linux/btf.h>
  12 #include <linux/bpf_verifier.h>
  13 #include <linux/filter.h>
  14 #include <net/netlink.h>
  15 #include <linux/file.h>
  16 #include <linux/vmalloc.h>
  17 #include <linux/stringify.h>
  18 #include <linux/bsearch.h>
  19 #include <linux/sort.h>
  20 #include <linux/perf_event.h>
  21 #include <linux/ctype.h>
  22 #include <linux/error-injection.h>
  23 #include <linux/bpf_lsm.h>
  24 #include <linux/btf_ids.h>
  25
  26 #include "disasm.h"
  27
  28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
  29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
  30         [_id] = & _name ## _verifier_ops,
  31 #define BPF_MAP_TYPE(_id, _ops)
  32 #define BPF_LINK_TYPE(_id, _name)
  33 #include <linux/bpf_types.h>
  34 #undef BPF_PROG_TYPE
  35 #undef BPF_MAP_TYPE
  36 #undef BPF_LINK_TYPE
  37 };
  38
  39 /* bpf_check() is a static code analyzer that walks eBPF program
  40  * instruction by instruction and updates register/stack state.
  41  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
  42  *
  43  * The first pass is depth-first-search to check that the program is a DAG.
  44  * It rejects the following programs:
  45  * - larger than BPF_MAXINSNS insns
  46  * - if loop is present (detected via back-edge)
  47  * - unreachable insns exist (shouldn't be a forest. program = one function)
  48  * - out of bounds or malformed jumps
  49  * The second pass is all possible path descent from the 1st insn.
  50  * Since it's analyzing all pathes through the program, the length of the
  51  * analysis is limited to 64k insn, which may be hit even if total number of
  52  * insn is less then 4K, but there are too many branches that change stack/regs.
  53  * Number of 'branches to be analyzed' is limited to 1k
  54  *
  55  * On entry to each instruction, each register has a type, and the instruction
  56  * changes the types of the registers depending on instruction semantics.
  57  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
  58  * copied to R1.
  59  *
  60  * All registers are 64-bit.
  61  * R0 - return register
  62  * R1-R5 argument passing registers
  63  * R6-R9 callee saved registers
  64  * R10 - frame pointer read-only
  65  *
  66  * At the start of BPF program the register R1 contains a pointer to bpf_context
  67  * and has type PTR_TO_CTX.
  68  *
  69  * Verifier tracks arithmetic operations on pointers in case:
  70  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
  71  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
  72  * 1st insn copies R10 (which has FRAME_PTR) type into R1
  73  * and 2nd arithmetic instruction is pattern matched to recognize
  74  * that it wants to construct a pointer to some element within stack.
  75  * So after 2nd insn, the register R1 has type PTR_TO_STACK
  76  * (and -20 constant is saved for further stack bounds checking).
  77  * Meaning that this reg is a pointer to stack plus known immediate constant.
  78  *
  79  * Most of the time the registers have SCALAR_VALUE type, which
  80  * means the register has some value, but it's not a valid pointer.
  81  * (like pointer plus pointer becomes SCALAR_VALUE type)
  82  *
  83  * When verifier sees load or store instructions the type of base register
  84  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
  85  * four pointer types recognized by check_mem_access() function.
  86  *
  87  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
  88  * and the range of [ptr, ptr + map's value_size) is accessible.
  89  *
  90  * registers used to pass values to function calls are checked against
  91  * function argument constraints.
  92  *
  93  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
  94  * It means that the register type passed to this function must be
  95  * PTR_TO_STACK and it will be used inside the function as
  96  * 'pointer to map element key'
  97  *
  98  * For example the argument constraints for bpf_map_lookup_elem():
  99  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 100  *   .arg1_type = ARG_CONST_MAP_PTR,
 101  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 102  *
 103  * ret_type says that this function returns 'pointer to map elem value or null'
 104  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 105  * 2nd argument should be a pointer to stack, which will be used inside
 106  * the helper function as a pointer to map element key.
 107  *
 108  * On the kernel side the helper function looks like:
 109  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 110  * {
 111  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 112  *    void *key = (void *) (unsigned long) r2;
 113  *    void *value;
 114  *
 115  *    here kernel can access 'key' and 'map' pointers safely, knowing that
 116  *    [key, key + map->key_size) bytes are valid and were initialized on
 117  *    the stack of eBPF program.
 118  * }
 119  *
 120  * Corresponding eBPF program may look like:
 121  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
 122  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
 123  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
 124  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
 125  * here verifier looks at prototype of map_lookup_elem() and sees:
 126  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
 127  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
 128  *
 129  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
 130  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
 131  * and were initialized prior to this call.
 132  * If it's ok, then verifier allows this BPF_CALL insn and looks at
 133  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
 134  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
 135  * returns ether pointer to map value or NULL.
 136  *
 137  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
 138  * insn, the register holding that pointer in the true branch changes state to
 139  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
 140  * branch. See check_cond_jmp_op().
 141  *
 142  * After the call R0 is set to return type of the function and registers R1-R5
 143  * are set to NOT_INIT to indicate that they are no longer readable.
 144  *
 145  * The following reference types represent a potential reference to a kernel
 146  * resource which, after first being allocated, must be checked and freed by
 147  * the BPF program:
 148  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
 149  *
 150  * When the verifier sees a helper call return a reference type, it allocates a
 151  * pointer id for the reference and stores it in the current function state.
 152  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
 153  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
 154  * passes through a NULL-check conditional. For the branch wherein the state is
 155  * changed to CONST_IMM, the verifier releases the reference.
 156  *
 157  * For each helper function that allocates a reference, such as
 158  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
 159  * bpf_sk_release(). When a reference type passes into the release function,
 160  * the verifier also releases the reference. If any unchecked or unreleased
 161  * reference remains at the end of the program, the verifier rejects it.
 162  */
 163
 164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
 165 struct bpf_verifier_stack_elem {
 166         /* verifer state is 'st'
 167          * before processing instruction 'insn_idx'
 168          * and after processing instruction 'prev_insn_idx'
 169          */
 170         struct bpf_verifier_state st;
 171         int insn_idx;
 172         int prev_insn_idx;
 173         struct bpf_verifier_stack_elem *next;
 174         /* length of verifier log at the time this state was pushed on stack */
 175         u32 log_pos;
 176 };
 177
 178 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ    8192
 179 #define BPF_COMPLEXITY_LIMIT_STATES     64
 180
 181 #define BPF_MAP_KEY_POISON      (1ULL << 63)
 182 #define BPF_MAP_KEY_SEEN        (1ULL << 62)
 183
 184 #define BPF_MAP_PTR_UNPRIV      1UL
 185 #define BPF_MAP_PTR_POISON      ((void *)((0xeB9FUL << 1) +     \
 186                                           POISON_POINTER_DELTA))
 187 #define BPF_MAP_PTR(X)          ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
 188
 189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
 190 {
 191         return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
 192 }
 193
 194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
 195 {
 196         return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
 197 }
 198
 199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
 200                               const struct bpf_map *map, bool unpriv)
 201 {
 202         BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
 203         unpriv |= bpf_map_ptr_unpriv(aux);
 204         aux->map_ptr_state = (unsigned long)map |
 205                              (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
 206 }
 207
 208 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
 209 {
 210         return aux->map_key_state & BPF_MAP_KEY_POISON;
 211 }
 212
 213 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
 214 {
 215         return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
 216 }
 217
 218 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
 219 {
 220         return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
 221 }
 222
 223 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
 224 {
 225         bool poisoned = bpf_map_key_poisoned(aux);
 226
 227         aux->map_key_state = state | BPF_MAP_KEY_SEEN |
 228                              (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
 229 }
 230
 231 struct bpf_call_arg_meta {
 232         struct bpf_map *map_ptr;
 233         bool raw_mode;
 234         bool pkt_access;
 235         int regno;
 236         int access_size;
 237         int mem_size;
 238         u64 msize_max_value;
 239         int ref_obj_id;
 240         int func_id;
 241         u32 btf_id;
 242         u32 ret_btf_id;
 243 };
 244
 245 struct btf *btf_vmlinux;
 246
 247 static DEFINE_MUTEX(bpf_verifier_lock);
 248
 249 static const struct bpf_line_info *
 250 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
 251 {
 252         const struct bpf_line_info *linfo;
 253         const struct bpf_prog *prog;
 254         u32 i, nr_linfo;
 255
 256         prog = env->prog;
 257         nr_linfo = prog->aux->nr_linfo;
 258
 259         if (!nr_linfo || insn_off >= prog->len)
 260                 return NULL;
 261
 262         linfo = prog->aux->linfo;
 263         for (i = 1; i < nr_linfo; i++)
 264                 if (insn_off < linfo[i].insn_off)
 265                         break;
 266
 267         return &linfo[i - 1];
 268 }
 269
 270 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 271                        va_list args)
 272 {
 273         unsigned int n;
 274
 275         n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
 276
 277         WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
 278                   "verifier log line truncated - local buffer too short\n");
 279
 280         n = min(log->len_total - log->len_used - 1, n);
 281         log->kbuf[n] = '\0';
 282
 283         if (log->level == BPF_LOG_KERNEL) {
 284                 pr_err("BPF:%s\n", log->kbuf);
 285                 return;
 286         }
 287         if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
 288                 log->len_used += n;
 289         else
 290                 log->ubuf = NULL;
 291 }
 292
 293 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
 294 {
 295         char zero = 0;
 296
 297         if (!bpf_verifier_log_needed(log))
 298                 return;
 299
 300         log->len_used = new_pos;
 301         if (put_user(zero, log->ubuf + new_pos))
 302                 log->ubuf = NULL;
 303 }
 304
 305 /* log_level controls verbosity level of eBPF verifier.
 306  * bpf_verifier_log_write() is used to dump the verification trace to the log,
 307  * so the user can figure out what's wrong with the program
 308  */
 309 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
 310                                            const char *fmt, ...)
 311 {
 312         va_list args;
 313
 314         if (!bpf_verifier_log_needed(&env->log))
 315                 return;
 316
 317         va_start(args, fmt);
 318         bpf_verifier_vlog(&env->log, fmt, args);
 319         va_end(args);
 320 }
 321 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
 322
 323 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
 324 {
 325         struct bpf_verifier_env *env = private_data;
 326         va_list args;
 327
 328         if (!bpf_verifier_log_needed(&env->log))
 329                 return;
 330
 331         va_start(args, fmt);
 332         bpf_verifier_vlog(&env->log, fmt, args);
 333         va_end(args);
 334 }
 335
 336 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
 337                             const char *fmt, ...)
 338 {
 339         va_list args;
 340
 341         if (!bpf_verifier_log_needed(log))
 342                 return;
 343
 344         va_start(args, fmt);
 345         bpf_verifier_vlog(log, fmt, args);
 346         va_end(args);
 347 }
 348
 349 static const char *ltrim(const char *s)
 350 {
 351         while (isspace(*s))
 352                 s++;
 353
 354         return s;
 355 }
 356
 357 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
 358                                          u32 insn_off,
 359                                          const char *prefix_fmt, ...)
 360 {
 361         const struct bpf_line_info *linfo;
 362
 363         if (!bpf_verifier_log_needed(&env->log))
 364                 return;
 365
 366         linfo = find_linfo(env, insn_off);
 367         if (!linfo || linfo == env->prev_linfo)
 368                 return;
 369
 370         if (prefix_fmt) {
 371                 va_list args;
 372
 373                 va_start(args, prefix_fmt);
 374                 bpf_verifier_vlog(&env->log, prefix_fmt, args);
 375                 va_end(args);
 376         }
 377
 378         verbose(env, "%s\n",
 379                 ltrim(btf_name_by_offset(env->prog->aux->btf,
 380                                          linfo->line_off)));
 381
 382         env->prev_linfo = linfo;
 383 }
 384
 385 static bool type_is_pkt_pointer(enum bpf_reg_type type)
 386 {
 387         return type == PTR_TO_PACKET ||
 388                type == PTR_TO_PACKET_META;
 389 }
 390
 391 static bool type_is_sk_pointer(enum bpf_reg_type type)
 392 {
 393         return type == PTR_TO_SOCKET ||
 394                 type == PTR_TO_SOCK_COMMON ||
 395                 type == PTR_TO_TCP_SOCK ||
 396                 type == PTR_TO_XDP_SOCK;
 397 }
 398
 399 static bool reg_type_not_null(enum bpf_reg_type type)
 400 {
 401         return type == PTR_TO_SOCKET ||
 402                 type == PTR_TO_TCP_SOCK ||
 403                 type == PTR_TO_MAP_VALUE ||
 404                 type == PTR_TO_SOCK_COMMON;
 405 }
 406
 407 static bool reg_type_may_be_null(enum bpf_reg_type type)
 408 {
 409         return type == PTR_TO_MAP_VALUE_OR_NULL ||
 410                type == PTR_TO_SOCKET_OR_NULL ||
 411                type == PTR_TO_SOCK_COMMON_OR_NULL ||
 412                type == PTR_TO_TCP_SOCK_OR_NULL ||
 413                type == PTR_TO_BTF_ID_OR_NULL ||
 414                type == PTR_TO_MEM_OR_NULL ||
 415                type == PTR_TO_RDONLY_BUF_OR_NULL ||
 416                type == PTR_TO_RDWR_BUF_OR_NULL;
 417 }
 418
 419 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 420 {
 421         return reg->type == PTR_TO_MAP_VALUE &&
 422                 map_value_has_spin_lock(reg->map_ptr);
 423 }
 424
 425 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 426 {
 427         return type == PTR_TO_SOCKET ||
 428                 type == PTR_TO_SOCKET_OR_NULL ||
 429                 type == PTR_TO_TCP_SOCK ||
 430                 type == PTR_TO_TCP_SOCK_OR_NULL ||
 431                 type == PTR_TO_MEM ||
 432                 type == PTR_TO_MEM_OR_NULL;
 433 }
 434
 435 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 436 {
 437         return type == ARG_PTR_TO_SOCK_COMMON;
 438 }
 439
 440 static bool arg_type_may_be_null(enum bpf_arg_type type)
 441 {
 442         return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
 443                type == ARG_PTR_TO_MEM_OR_NULL ||
 444                type == ARG_PTR_TO_CTX_OR_NULL ||
 445                type == ARG_PTR_TO_SOCKET_OR_NULL ||
 446                type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
 447 }
 448
 449 /* Determine whether the function releases some resources allocated by another
 450  * function call. The first reference type argument will be assumed to be
 451  * released by release_reference().
 452  */
 453 static bool is_release_function(enum bpf_func_id func_id)
 454 {
 455         return func_id == BPF_FUNC_sk_release ||
 456                func_id == BPF_FUNC_ringbuf_submit ||
 457                func_id == BPF_FUNC_ringbuf_discard;
 458 }
 459
 460 static bool may_be_acquire_function(enum bpf_func_id func_id)
 461 {
 462         return func_id == BPF_FUNC_sk_lookup_tcp ||
 463                 func_id == BPF_FUNC_sk_lookup_udp ||
 464                 func_id == BPF_FUNC_skc_lookup_tcp ||
 465                 func_id == BPF_FUNC_map_lookup_elem ||
 466                 func_id == BPF_FUNC_ringbuf_reserve;
 467 }
 468
 469 static bool is_acquire_function(enum bpf_func_id func_id,
 470                                 const struct bpf_map *map)
 471 {
 472         enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
 473
 474         if (func_id == BPF_FUNC_sk_lookup_tcp ||
 475             func_id == BPF_FUNC_sk_lookup_udp ||
 476             func_id == BPF_FUNC_skc_lookup_tcp ||
 477             func_id == BPF_FUNC_ringbuf_reserve)
 478                 return true;
 479
 480         if (func_id == BPF_FUNC_map_lookup_elem &&
 481             (map_type == BPF_MAP_TYPE_SOCKMAP ||
 482              map_type == BPF_MAP_TYPE_SOCKHASH))
 483                 return true;
 484
 485         return false;
 486 }
 487
 488 static bool is_ptr_cast_function(enum bpf_func_id func_id)
 489 {
 490         return func_id == BPF_FUNC_tcp_sock ||
 491                 func_id == BPF_FUNC_sk_fullsock ||
 492                 func_id == BPF_FUNC_skc_to_tcp_sock ||
 493                 func_id == BPF_FUNC_skc_to_tcp6_sock ||
 494                 func_id == BPF_FUNC_skc_to_udp6_sock ||
 495                 func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
 496                 func_id == BPF_FUNC_skc_to_tcp_request_sock;
 497 }
 498
 499 /* string representation of 'enum bpf_reg_type' */
 500 static const char * const reg_type_str[] = {
 501         [NOT_INIT]              = "?",
 502         [SCALAR_VALUE]          = "inv",
 503         [PTR_TO_CTX]            = "ctx",
 504         [CONST_PTR_TO_MAP]      = "map_ptr",
 505         [PTR_TO_MAP_VALUE]      = "map_value",
 506         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
 507         [PTR_TO_STACK]          = "fp",
 508         [PTR_TO_PACKET]         = "pkt",
 509         [PTR_TO_PACKET_META]    = "pkt_meta",
 510         [PTR_TO_PACKET_END]     = "pkt_end",
 511         [PTR_TO_FLOW_KEYS]      = "flow_keys",
 512         [PTR_TO_SOCKET]         = "sock",
 513         [PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
 514         [PTR_TO_SOCK_COMMON]    = "sock_common",
 515         [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
 516         [PTR_TO_TCP_SOCK]       = "tcp_sock",
 517         [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
 518         [PTR_TO_TP_BUFFER]      = "tp_buffer",
 519         [PTR_TO_XDP_SOCK]       = "xdp_sock",
 520         [PTR_TO_BTF_ID]         = "ptr_",
 521         [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_",
 522         [PTR_TO_PERCPU_BTF_ID]  = "percpu_ptr_",
 523         [PTR_TO_MEM]            = "mem",
 524         [PTR_TO_MEM_OR_NULL]    = "mem_or_null",
 525         [PTR_TO_RDONLY_BUF]     = "rdonly_buf",
 526         [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
 527         [PTR_TO_RDWR_BUF]       = "rdwr_buf",
 528         [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
 529 };
 530
 531 static char slot_type_char[] = {
 532         [STACK_INVALID] = '?',
 533         [STACK_SPILL]   = 'r',
 534         [STACK_MISC]    = 'm',
 535         [STACK_ZERO]    = '0',
 536 };
 537
 538 static void print_liveness(struct bpf_verifier_env *env,
 539                            enum bpf_reg_liveness live)
 540 {
 541         if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
 542             verbose(env, "_");
 543         if (live & REG_LIVE_READ)
 544                 verbose(env, "r");
 545         if (live & REG_LIVE_WRITTEN)
 546                 verbose(env, "w");
 547         if (live & REG_LIVE_DONE)
 548                 verbose(env, "D");
 549 }
 550
 551 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 552                                    const struct bpf_reg_state *reg)
 553 {
 554         struct bpf_verifier_state *cur = env->cur_state;
 555
 556         return cur->frame[reg->frameno];
 557 }
 558
 559 const char *kernel_type_name(u32 id)
 560 {
 561         return btf_name_by_offset(btf_vmlinux,
 562                                   btf_type_by_id(btf_vmlinux, id)->name_off);
 563 }
 564
 565 static void print_verifier_state(struct bpf_verifier_env *env,
 566                                  const struct bpf_func_state *state)
 567 {
 568         const struct bpf_reg_state *reg;
 569         enum bpf_reg_type t;
 570         int i;
 571
 572         if (state->frameno)
 573                 verbose(env, " frame%d:", state->frameno);
 574         for (i = 0; i < MAX_BPF_REG; i++) {
 575                 reg = &state->regs[i];
 576                 t = reg->type;
 577                 if (t == NOT_INIT)
 578                         continue;
 579                 verbose(env, " R%d", i);
 580                 print_liveness(env, reg->live);
 581                 verbose(env, "=%s", reg_type_str[t]);
 582                 if (t == SCALAR_VALUE && reg->precise)
 583                         verbose(env, "P");
 584                 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
 585                     tnum_is_const(reg->var_off)) {
 586                         /* reg->off should be 0 for SCALAR_VALUE */
 587                         verbose(env, "%lld", reg->var_off.value + reg->off);
 588                 } else {
 589                         if (t == PTR_TO_BTF_ID ||
 590                             t == PTR_TO_BTF_ID_OR_NULL ||
 591                             t == PTR_TO_PERCPU_BTF_ID)
 592                                 verbose(env, "%s", kernel_type_name(reg->btf_id));
 593                         verbose(env, "(id=%d", reg->id);
 594                         if (reg_type_may_be_refcounted_or_null(t))
 595                                 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
 596                         if (t != SCALAR_VALUE)
 597                                 verbose(env, ",off=%d", reg->off);
 598                         if (type_is_pkt_pointer(t))
 599                                 verbose(env, ",r=%d", reg->range);
 600                         else if (t == CONST_PTR_TO_MAP ||
 601                                  t == PTR_TO_MAP_VALUE ||
 602                                  t == PTR_TO_MAP_VALUE_OR_NULL)
 603                                 verbose(env, ",ks=%d,vs=%d",
 604                                         reg->map_ptr->key_size,
 605                                         reg->map_ptr->value_size);
 606                         if (tnum_is_const(reg->var_off)) {
 607                                 /* Typically an immediate SCALAR_VALUE, but
 608                                  * could be a pointer whose offset is too big
 609                                  * for reg->off
 610                                  */
 611                                 verbose(env, ",imm=%llx", reg->var_off.value);
 612                         } else {
 613                                 if (reg->smin_value != reg->umin_value &&
 614                                     reg->smin_value != S64_MIN)
 615                                         verbose(env, ",smin_value=%lld",
 616                                                 (long long)reg->smin_value);
 617                                 if (reg->smax_value != reg->umax_value &&
 618                                     reg->smax_value != S64_MAX)
 619                                         verbose(env, ",smax_value=%lld",
 620                                                 (long long)reg->smax_value);
 621                                 if (reg->umin_value != 0)
 622                                         verbose(env, ",umin_value=%llu",
 623                                                 (unsigned long long)reg->umin_value);
 624                                 if (reg->umax_value != U64_MAX)
 625                                         verbose(env, ",umax_value=%llu",
 626                                                 (unsigned long long)reg->umax_value);
 627                                 if (!tnum_is_unknown(reg->var_off)) {
 628                                         char tn_buf[48];
 629
 630                                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
 631                                         verbose(env, ",var_off=%s", tn_buf);
 632                                 }
 633                                 if (reg->s32_min_value != reg->smin_value &&
 634                                     reg->s32_min_value != S32_MIN)
 635                                         verbose(env, ",s32_min_value=%d",
 636                                                 (int)(reg->s32_min_value));
 637                                 if (reg->s32_max_value != reg->smax_value &&
 638                                     reg->s32_max_value != S32_MAX)
 639                                         verbose(env, ",s32_max_value=%d",
 640                                                 (int)(reg->s32_max_value));
 641                                 if (reg->u32_min_value != reg->umin_value &&
 642                                     reg->u32_min_value != U32_MIN)
 643                                         verbose(env, ",u32_min_value=%d",
 644                                                 (int)(reg->u32_min_value));
 645                                 if (reg->u32_max_value != reg->umax_value &&
 646                                     reg->u32_max_value != U32_MAX)
 647                                         verbose(env, ",u32_max_value=%d",
 648                                                 (int)(reg->u32_max_value));
 649                         }
 650                         verbose(env, ")");
 651                 }
 652         }
 653         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
 654                 char types_buf[BPF_REG_SIZE + 1];
 655                 bool valid = false;
 656                 int j;
 657
 658                 for (j = 0; j < BPF_REG_SIZE; j++) {
 659                         if (state->stack[i].slot_type[j] != STACK_INVALID)
 660                                 valid = true;
 661                         types_buf[j] = slot_type_char[
 662                                         state->stack[i].slot_type[j]];
 663                 }
 664                 types_buf[BPF_REG_SIZE] = 0;
 665                 if (!valid)
 666                         continue;
 667                 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
 668                 print_liveness(env, state->stack[i].spilled_ptr.live);
 669                 if (state->stack[i].slot_type[0] == STACK_SPILL) {
 670                         reg = &state->stack[i].spilled_ptr;
 671                         t = reg->type;
 672                         verbose(env, "=%s", reg_type_str[t]);
 673                         if (t == SCALAR_VALUE && reg->precise)
 674                                 verbose(env, "P");
 675                         if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
 676                                 verbose(env, "%lld", reg->var_off.value + reg->off);
 677                 } else {
 678                         verbose(env, "=%s", types_buf);
 679                 }
 680         }
 681         if (state->acquired_refs && state->refs[0].id) {
 682                 verbose(env, " refs=%d", state->refs[0].id);
 683                 for (i = 1; i < state->acquired_refs; i++)
 684                         if (state->refs[i].id)
 685                                 verbose(env, ",%d", state->refs[i].id);
 686         }
 687         verbose(env, "\n");
 688 }
 689
 690 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)                         \
 691 static int copy_##NAME##_state(struct bpf_func_state *dst,              \
 692                                const struct bpf_func_state *src)        \
 693 {                                                                       \
 694         if (!src->FIELD)                                                \
 695                 return 0;                                               \
 696         if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {                    \
 697                 /* internal bug, make state invalid to reject the program */ \
 698                 memset(dst, 0, sizeof(*dst));                           \
 699                 return -EFAULT;                                         \
 700         }                                                               \
 701         memcpy(dst->FIELD, src->FIELD,                                  \
 702                sizeof(*src->FIELD) * (src->COUNT / SIZE));              \
 703         return 0;                                                       \
 704 }
 705 /* copy_reference_state() */
 706 COPY_STATE_FN(reference, acquired_refs, refs, 1)
 707 /* copy_stack_state() */
 708 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 709 #undef COPY_STATE_FN
 710
 711 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)                      \
 712 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
 713                                   bool copy_old)                        \
 714 {                                                                       \
 715         u32 old_size = state->COUNT;                                    \
 716         struct bpf_##NAME##_state *new_##FIELD;                         \
 717         int slot = size / SIZE;                                         \
 718                                                                         \
 719         if (size <= old_size || !size) {                                \
 720                 if (copy_old)                                           \
 721                         return 0;                                       \
 722                 state->COUNT = slot * SIZE;                             \
 723                 if (!size && old_size) {                                \
 724                         kfree(state->FIELD);                            \
 725                         state->FIELD = NULL;                            \
 726                 }                                                       \
 727                 return 0;                                               \
 728         }                                                               \
 729         new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
 730                                     GFP_KERNEL);                        \
 731         if (!new_##FIELD)                                               \
 732                 return -ENOMEM;                                         \
 733         if (copy_old) {                                                 \
 734                 if (state->FIELD)                                       \
 735                         memcpy(new_##FIELD, state->FIELD,               \
 736                                sizeof(*new_##FIELD) * (old_size / SIZE)); \
 737                 memset(new_##FIELD + old_size / SIZE, 0,                \
 738                        sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
 739         }                                                               \
 740         state->COUNT = slot * SIZE;                                     \
 741         kfree(state->FIELD);                                            \
 742         state->FIELD = new_##FIELD;                                     \
 743         return 0;                                                       \
 744 }
 745 /* realloc_reference_state() */
 746 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
 747 /* realloc_stack_state() */
 748 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
 749 #undef REALLOC_STATE_FN
 750
 751 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
 752  * make it consume minimal amount of memory. check_stack_write() access from
 753  * the program calls into realloc_func_state() to grow the stack size.
 754  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
 755  * which realloc_stack_state() copies over. It points to previous
 756  * bpf_verifier_state which is never reallocated.
 757  */
 758 static int realloc_func_state(struct bpf_func_state *state, int stack_size,
 759                               int refs_size, bool copy_old)
 760 {
 761         int err = realloc_reference_state(state, refs_size, copy_old);
 762         if (err)
 763                 return err;
 764         return realloc_stack_state(state, stack_size, copy_old);
 765 }
 766
 767 /* Acquire a pointer id from the env and update the state->refs to include
 768  * this new pointer reference.
 769  * On success, returns a valid pointer id to associate with the register
 770  * On failure, returns a negative errno.
 771  */
 772 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
 773 {
 774         struct bpf_func_state *state = cur_func(env);
 775         int new_ofs = state->acquired_refs;
 776         int id, err;
 777
 778         err = realloc_reference_state(state, state->acquired_refs + 1, true);
 779         if (err)
 780                 return err;
 781         id = ++env->id_gen;
 782         state->refs[new_ofs].id = id;
 783         state->refs[new_ofs].insn_idx = insn_idx;
 784
 785         return id;
 786 }
 787
 788 /* release function corresponding to acquire_reference_state(). Idempotent. */
 789 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
 790 {
 791         int i, last_idx;
 792
 793         last_idx = state->acquired_refs - 1;
 794         for (i = 0; i < state->acquired_refs; i++) {
 795                 if (state->refs[i].id == ptr_id) {
 796                         if (last_idx && i != last_idx)
 797                                 memcpy(&state->refs[i], &state->refs[last_idx],
 798                                        sizeof(*state->refs));
 799                         memset(&state->refs[last_idx], 0, sizeof(*state->refs));
 800                         state->acquired_refs--;
 801                         return 0;
 802                 }
 803         }
 804         return -EINVAL;
 805 }
 806
 807 static int transfer_reference_state(struct bpf_func_state *dst,
 808                                     struct bpf_func_state *src)
 809 {
 810         int err = realloc_reference_state(dst, src->acquired_refs, false);
 811         if (err)
 812                 return err;
 813         err = copy_reference_state(dst, src);
 814         if (err)
 815                 return err;
 816         return 0;
 817 }
 818
 819 static void free_func_state(struct bpf_func_state *state)
 820 {
 821         if (!state)
 822                 return;
 823         kfree(state->refs);
 824         kfree(state->stack);
 825         kfree(state);
 826 }
 827
 828 static void clear_jmp_history(struct bpf_verifier_state *state)
 829 {
 830         kfree(state->jmp_history);
 831         state->jmp_history = NULL;
 832         state->jmp_history_cnt = 0;
 833 }
 834
 835 static void free_verifier_state(struct bpf_verifier_state *state,
 836                                 bool free_self)
 837 {
 838         int i;
 839
 840         for (i = 0; i <= state->curframe; i++) {
 841                 free_func_state(state->frame[i]);
 842                 state->frame[i] = NULL;
 843         }
 844         clear_jmp_history(state);
 845         if (free_self)
 846                 kfree(state);
 847 }
 848
 849 /* copy verifier state from src to dst growing dst stack space
 850  * when necessary to accommodate larger src stack
 851  */
 852 static int copy_func_state(struct bpf_func_state *dst,
 853                            const struct bpf_func_state *src)
 854 {
 855         int err;
 856
 857         err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
 858                                  false);
 859         if (err)
 860                 return err;
 861         memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
 862         err = copy_reference_state(dst, src);
 863         if (err)
 864                 return err;
 865         return copy_stack_state(dst, src);
 866 }
 867
 868 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 869                                const struct bpf_verifier_state *src)
 870 {
 871         struct bpf_func_state *dst;
 872         u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
 873         int i, err;
 874
 875         if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
 876                 kfree(dst_state->jmp_history);
 877                 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
 878                 if (!dst_state->jmp_history)
 879                         return -ENOMEM;
 880         }
 881         memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
 882         dst_state->jmp_history_cnt = src->jmp_history_cnt;
 883
 884         /* if dst has more stack frames then src frame, free them */
 885         for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
 886                 free_func_state(dst_state->frame[i]);
 887                 dst_state->frame[i] = NULL;
 888         }
 889         dst_state->speculative = src->speculative;
 890         dst_state->curframe = src->curframe;
 891         dst_state->active_spin_lock = src->active_spin_lock;
 892         dst_state->branches = src->branches;
 893         dst_state->parent = src->parent;
 894         dst_state->first_insn_idx = src->first_insn_idx;
 895         dst_state->last_insn_idx = src->last_insn_idx;
 896         for (i = 0; i <= src->curframe; i++) {
 897                 dst = dst_state->frame[i];
 898                 if (!dst) {
 899                         dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 900                         if (!dst)
 901                                 return -ENOMEM;
 902                         dst_state->frame[i] = dst;
 903                 }
 904                 err = copy_func_state(dst, src->frame[i]);
 905                 if (err)
 906                         return err;
 907         }
 908         return 0;
 909 }
 910
 911 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
 912 {
 913         while (st) {
 914                 u32 br = --st->branches;
 915
 916                 /* WARN_ON(br > 1) technically makes sense here,
 917                  * but see comment in push_stack(), hence:
 918                  */
 919                 WARN_ONCE((int)br < 0,
 920                           "BUG update_branch_counts:branches_to_explore=%d\n",
 921                           br);
 922                 if (br)
 923                         break;
 924                 st = st->parent;
 925         }
 926 }
 927
 928 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 929                      int *insn_idx, bool pop_log)
 930 {
 931         struct bpf_verifier_state *cur = env->cur_state;
 932         struct bpf_verifier_stack_elem *elem, *head = env->head;
 933         int err;
 934
 935         if (env->head == NULL)
 936                 return -ENOENT;
 937
 938         if (cur) {
 939                 err = copy_verifier_state(cur, &head->st);
 940                 if (err)
 941                         return err;
 942         }
 943         if (pop_log)
 944                 bpf_vlog_reset(&env->log, head->log_pos);
 945         if (insn_idx)
 946                 *insn_idx = head->insn_idx;
 947         if (prev_insn_idx)
 948                 *prev_insn_idx = head->prev_insn_idx;
 949         elem = head->next;
 950         free_verifier_state(&head->st, false);
 951         kfree(head);
 952         env->head = elem;
 953         env->stack_size--;
 954         return 0;
 955 }
 956
 957 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 958                                              int insn_idx, int prev_insn_idx,
 959                                              bool speculative)
 960 {
 961         struct bpf_verifier_state *cur = env->cur_state;
 962         struct bpf_verifier_stack_elem *elem;
 963         int err;
 964
 965         elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
 966         if (!elem)
 967                 goto err;
 968
 969         elem->insn_idx = insn_idx;
 970         elem->prev_insn_idx = prev_insn_idx;
 971         elem->next = env->head;
 972         elem->log_pos = env->log.len_used;
 973         env->head = elem;
 974         env->stack_size++;
 975         err = copy_verifier_state(&elem->st, cur);
 976         if (err)
 977                 goto err;
 978         elem->st.speculative |= speculative;
 979         if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
 980                 verbose(env, "The sequence of %d jumps is too complex.\n",
 981                         env->stack_size);
 982                 goto err;
 983         }
 984         if (elem->st.parent) {
 985                 ++elem->st.parent->branches;
 986                 /* WARN_ON(branches > 2) technically makes sense here,
 987                  * but
 988                  * 1. speculative states will bump 'branches' for non-branch
 989                  * instructions
 990                  * 2. is_state_visited() heuristics may decide not to create
 991                  * a new state for a sequence of branches and all such current
 992                  * and cloned states will be pointing to a single parent state
 993                  * which might have large 'branches' count.
 994                  */
 995         }
 996         return &elem->st;
 997 err:
 998         free_verifier_state(env->cur_state, true);
 999         env->cur_state = NULL;
1000         /* pop all elements and return */
1001         while (!pop_stack(env, NULL, NULL, false));
1002         return NULL;
1003 }
1004
1005 #define CALLER_SAVED_REGS 6
1006 static const int caller_saved[CALLER_SAVED_REGS] = {
1007         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1008 };
1009
1010 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1011                                 struct bpf_reg_state *reg);
1012
1013 /* This helper doesn't clear reg->id */
1014 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1015 {
1016         reg->var_off = tnum_const(imm);
1017         reg->smin_value = (s64)imm;
1018         reg->smax_value = (s64)imm;
1019         reg->umin_value = imm;
1020         reg->umax_value = imm;
1021
1022         reg->s32_min_value = (s32)imm;
1023         reg->s32_max_value = (s32)imm;
1024         reg->u32_min_value = (u32)imm;
1025         reg->u32_max_value = (u32)imm;
1026 }
1027
1028 /* Mark the unknown part of a register (variable offset or scalar value) as
1029  * known to have the value @imm.
1030  */
1031 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1032 {
1033         /* Clear id, off, and union(map_ptr, range) */
1034         memset(((u8 *)reg) + sizeof(reg->type), 0,
1035                offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1036         ___mark_reg_known(reg, imm);
1037 }
1038
1039 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1040 {
1041         reg->var_off = tnum_const_subreg(reg->var_off, imm);
1042         reg->s32_min_value = (s32)imm;
1043         reg->s32_max_value = (s32)imm;
1044         reg->u32_min_value = (u32)imm;
1045         reg->u32_max_value = (u32)imm;
1046 }
1047
1048 /* Mark the 'variable offset' part of a register as zero.  This should be
1049  * used only on registers holding a pointer type.
1050  */
1051 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1052 {
1053         __mark_reg_known(reg, 0);
1054 }
1055
1056 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1057 {
1058         __mark_reg_known(reg, 0);
1059         reg->type = SCALAR_VALUE;
1060 }
1061
1062 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1063                                 struct bpf_reg_state *regs, u32 regno)
1064 {
1065         if (WARN_ON(regno >= MAX_BPF_REG)) {
1066                 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1067                 /* Something bad happened, let's kill all regs */
1068                 for (regno = 0; regno < MAX_BPF_REG; regno++)
1069                         __mark_reg_not_init(env, regs + regno);
1070                 return;
1071         }
1072         __mark_reg_known_zero(regs + regno);
1073 }
1074
1075 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1076 {
1077         return type_is_pkt_pointer(reg->type);
1078 }
1079
1080 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1081 {
1082         return reg_is_pkt_pointer(reg) ||
1083                reg->type == PTR_TO_PACKET_END;
1084 }
1085
1086 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1087 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1088                                     enum bpf_reg_type which)
1089 {
1090         /* The register can already have a range from prior markings.
1091          * This is fine as long as it hasn't been advanced from its
1092          * origin.
1093          */
1094         return reg->type == which &&
1095                reg->id == 0 &&
1096                reg->off == 0 &&
1097                tnum_equals_const(reg->var_off, 0);
1098 }
1099
1100 /* Reset the min/max bounds of a register */
1101 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1102 {
1103         reg->smin_value = S64_MIN;
1104         reg->smax_value = S64_MAX;
1105         reg->umin_value = 0;
1106         reg->umax_value = U64_MAX;
1107
1108         reg->s32_min_value = S32_MIN;
1109         reg->s32_max_value = S32_MAX;
1110         reg->u32_min_value = 0;
1111         reg->u32_max_value = U32_MAX;
1112 }
1113
1114 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1115 {
1116         reg->smin_value = S64_MIN;
1117         reg->smax_value = S64_MAX;
1118         reg->umin_value = 0;
1119         reg->umax_value = U64_MAX;
1120 }
1121
1122 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1123 {
1124         reg->s32_min_value = S32_MIN;
1125         reg->s32_max_value = S32_MAX;
1126         reg->u32_min_value = 0;
1127         reg->u32_max_value = U32_MAX;
1128 }
1129
1130 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1131 {
1132         struct tnum var32_off = tnum_subreg(reg->var_off);
1133
1134         /* min signed is max(sign bit) | min(other bits) */
1135         reg->s32_min_value = max_t(s32, reg->s32_min_value,
1136                         var32_off.value | (var32_off.mask & S32_MIN));
1137         /* max signed is min(sign bit) | max(other bits) */
1138         reg->s32_max_value = min_t(s32, reg->s32_max_value,
1139                         var32_off.value | (var32_off.mask & S32_MAX));
1140         reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1141         reg->u32_max_value = min(reg->u32_max_value,
1142                                  (u32)(var32_off.value | var32_off.mask));
1143 }
1144
1145 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1146 {
1147         /* min signed is max(sign bit) | min(other bits) */
1148         reg->smin_value = max_t(s64, reg->smin_value,
1149                                 reg->var_off.value | (reg->var_off.mask & S64_MIN));
1150         /* max signed is min(sign bit) | max(other bits) */
1151         reg->smax_value = min_t(s64, reg->smax_value,
1152                                 reg->var_off.value | (reg->var_off.mask & S64_MAX));
1153         reg->umin_value = max(reg->umin_value, reg->var_off.value);
1154         reg->umax_value = min(reg->umax_value,
1155                               reg->var_off.value | reg->var_off.mask);
1156 }
1157
1158 static void __update_reg_bounds(struct bpf_reg_state *reg)
1159 {
1160         __update_reg32_bounds(reg);
1161         __update_reg64_bounds(reg);
1162 }
1163
1164 /* Uses signed min/max values to inform unsigned, and vice-versa */
1165 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1166 {
1167         /* Learn sign from signed bounds.
1168          * If we cannot cross the sign boundary, then signed and unsigned bounds
1169          * are the same, so combine.  This works even in the negative case, e.g.
1170          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1171          */
1172         if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1173                 reg->s32_min_value = reg->u32_min_value =
1174                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1175                 reg->s32_max_value = reg->u32_max_value =
1176                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1177                 return;
1178         }
1179         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1180          * boundary, so we must be careful.
1181          */
1182         if ((s32)reg->u32_max_value >= 0) {
1183                 /* Positive.  We can't learn anything from the smin, but smax
1184                  * is positive, hence safe.
1185                  */
1186                 reg->s32_min_value = reg->u32_min_value;
1187                 reg->s32_max_value = reg->u32_max_value =
1188                         min_t(u32, reg->s32_max_value, reg->u32_max_value);
1189         } else if ((s32)reg->u32_min_value < 0) {
1190                 /* Negative.  We can't learn anything from the smax, but smin
1191                  * is negative, hence safe.
1192                  */
1193                 reg->s32_min_value = reg->u32_min_value =
1194                         max_t(u32, reg->s32_min_value, reg->u32_min_value);
1195                 reg->s32_max_value = reg->u32_max_value;
1196         }
1197 }
1198
1199 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1200 {
1201         /* Learn sign from signed bounds.
1202          * If we cannot cross the sign boundary, then signed and unsigned bounds
1203          * are the same, so combine.  This works even in the negative case, e.g.
1204          * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1205          */
1206         if (reg->smin_value >= 0 || reg->smax_value < 0) {
1207                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1208                                                           reg->umin_value);
1209                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1210                                                           reg->umax_value);
1211                 return;
1212         }
1213         /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1214          * boundary, so we must be careful.
1215          */
1216         if ((s64)reg->umax_value >= 0) {
1217                 /* Positive.  We can't learn anything from the smin, but smax
1218                  * is positive, hence safe.
1219                  */
1220                 reg->smin_value = reg->umin_value;
1221                 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1222                                                           reg->umax_value);
1223         } else if ((s64)reg->umin_value < 0) {
1224                 /* Negative.  We can't learn anything from the smax, but smin
1225                  * is negative, hence safe.
1226                  */
1227                 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1228                                                           reg->umin_value);
1229                 reg->smax_value = reg->umax_value;
1230         }
1231 }
1232
1233 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1234 {
1235         __reg32_deduce_bounds(reg);
1236         __reg64_deduce_bounds(reg);
1237 }
1238
1239 /* Attempts to improve var_off based on unsigned min/max information */
1240 static void __reg_bound_offset(struct bpf_reg_state *reg)
1241 {
1242         struct tnum var64_off = tnum_intersect(reg->var_off,
1243                                                tnum_range(reg->umin_value,
1244                                                           reg->umax_value));
1245         struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1246                                                 tnum_range(reg->u32_min_value,
1247                                                            reg->u32_max_value));
1248
1249         reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1250 }
1251
1252 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1253 {
1254         reg->umin_value = reg->u32_min_value;
1255         reg->umax_value = reg->u32_max_value;
1256         /* Attempt to pull 32-bit signed bounds into 64-bit bounds
1257          * but must be positive otherwise set to worse case bounds
1258          * and refine later from tnum.
1259          */
1260         if (reg->s32_min_value >= 0 && reg->s32_max_value >= 0)
1261                 reg->smax_value = reg->s32_max_value;
1262         else
1263                 reg->smax_value = U32_MAX;
1264         if (reg->s32_min_value >= 0)
1265                 reg->smin_value = reg->s32_min_value;
1266         else
1267                 reg->smin_value = 0;
1268 }
1269
1270 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1271 {
1272         /* special case when 64-bit register has upper 32-bit register
1273          * zeroed. Typically happens after zext or <<32, >>32 sequence
1274          * allowing us to use 32-bit bounds directly,
1275          */
1276         if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1277                 __reg_assign_32_into_64(reg);
1278         } else {
1279                 /* Otherwise the best we can do is push lower 32bit known and
1280                  * unknown bits into register (var_off set from jmp logic)
1281                  * then learn as much as possible from the 64-bit tnum
1282                  * known and unknown bits. The previous smin/smax bounds are
1283                  * invalid here because of jmp32 compare so mark them unknown
1284                  * so they do not impact tnum bounds calculation.
1285                  */
1286                 __mark_reg64_unbounded(reg);
1287                 __update_reg_bounds(reg);
1288         }
1289
1290         /* Intersecting with the old var_off might have improved our bounds
1291          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1292          * then new var_off is (0; 0x7f...fc) which improves our umax.
1293          */
1294         __reg_deduce_bounds(reg);
1295         __reg_bound_offset(reg);
1296         __update_reg_bounds(reg);
1297 }
1298
1299 static bool __reg64_bound_s32(s64 a)
1300 {
1301         return a > S32_MIN && a < S32_MAX;
1302 }
1303
1304 static bool __reg64_bound_u32(u64 a)
1305 {
1306         return a > U32_MIN && a < U32_MAX;
1307 }
1308
1309 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1310 {
1311         __mark_reg32_unbounded(reg);
1312
1313         if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1314                 reg->s32_min_value = (s32)reg->smin_value;
1315                 reg->s32_max_value = (s32)reg->smax_value;
1316         }
1317         if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1318                 reg->u32_min_value = (u32)reg->umin_value;
1319                 reg->u32_max_value = (u32)reg->umax_value;
1320         }
1321
1322         /* Intersecting with the old var_off might have improved our bounds
1323          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1324          * then new var_off is (0; 0x7f...fc) which improves our umax.
1325          */
1326         __reg_deduce_bounds(reg);
1327         __reg_bound_offset(reg);
1328         __update_reg_bounds(reg);
1329 }
1330
1331 /* Mark a register as having a completely unknown (scalar) value. */
1332 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1333                                struct bpf_reg_state *reg)
1334 {
1335         /*
1336          * Clear type, id, off, and union(map_ptr, range) and
1337          * padding between 'type' and union
1338          */
1339         memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1340         reg->type = SCALAR_VALUE;
1341         reg->var_off = tnum_unknown;
1342         reg->frameno = 0;
1343         reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1344         __mark_reg_unbounded(reg);
1345 }
1346
1347 static void mark_reg_unknown(struct bpf_verifier_env *env,
1348                              struct bpf_reg_state *regs, u32 regno)
1349 {
1350         if (WARN_ON(regno >= MAX_BPF_REG)) {
1351                 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1352                 /* Something bad happened, let's kill all regs except FP */
1353                 for (regno = 0; regno < BPF_REG_FP; regno++)
1354                         __mark_reg_not_init(env, regs + regno);
1355                 return;
1356         }
1357         __mark_reg_unknown(env, regs + regno);
1358 }
1359
1360 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1361                                 struct bpf_reg_state *reg)
1362 {
1363         __mark_reg_unknown(env, reg);
1364         reg->type = NOT_INIT;
1365 }
1366
1367 static void mark_reg_not_init(struct bpf_verifier_env *env,
1368                               struct bpf_reg_state *regs, u32 regno)
1369 {
1370         if (WARN_ON(regno >= MAX_BPF_REG)) {
1371                 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1372                 /* Something bad happened, let's kill all regs except FP */
1373                 for (regno = 0; regno < BPF_REG_FP; regno++)
1374                         __mark_reg_not_init(env, regs + regno);
1375                 return;
1376         }
1377         __mark_reg_not_init(env, regs + regno);
1378 }
1379
1380 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1381                             struct bpf_reg_state *regs, u32 regno,
1382                             enum bpf_reg_type reg_type, u32 btf_id)
1383 {
1384         if (reg_type == SCALAR_VALUE) {
1385                 mark_reg_unknown(env, regs, regno);
1386                 return;
1387         }
1388         mark_reg_known_zero(env, regs, regno);
1389         regs[regno].type = PTR_TO_BTF_ID;
1390         regs[regno].btf_id = btf_id;
1391 }
1392
1393 #define DEF_NOT_SUBREG  (0)
1394 static void init_reg_state(struct bpf_verifier_env *env,
1395                            struct bpf_func_state *state)
1396 {
1397         struct bpf_reg_state *regs = state->regs;
1398         int i;
1399
1400         for (i = 0; i < MAX_BPF_REG; i++) {
1401                 mark_reg_not_init(env, regs, i);
1402                 regs[i].live = REG_LIVE_NONE;
1403                 regs[i].parent = NULL;
1404                 regs[i].subreg_def = DEF_NOT_SUBREG;
1405         }
1406
1407         /* frame pointer */
1408         regs[BPF_REG_FP].type = PTR_TO_STACK;
1409         mark_reg_known_zero(env, regs, BPF_REG_FP);
1410         regs[BPF_REG_FP].frameno = state->frameno;
1411 }
1412
1413 #define BPF_MAIN_FUNC (-1)
1414 static void init_func_state(struct bpf_verifier_env *env,
1415                             struct bpf_func_state *state,
1416                             int callsite, int frameno, int subprogno)
1417 {
1418         state->callsite = callsite;
1419         state->frameno = frameno;
1420         state->subprogno = subprogno;
1421         init_reg_state(env, state);
1422 }
1423
1424 enum reg_arg_type {
1425         SRC_OP,         /* register is used as source operand */
1426         DST_OP,         /* register is used as destination operand */
1427         DST_OP_NO_MARK  /* same as above, check only, don't mark */
1428 };
1429
1430 static int cmp_subprogs(const void *a, const void *b)
1431 {
1432         return ((struct bpf_subprog_info *)a)->start -
1433                ((struct bpf_subprog_info *)b)->start;
1434 }
1435
1436 static int find_subprog(struct bpf_verifier_env *env, int off)
1437 {
1438         struct bpf_subprog_info *p;
1439
1440         p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1441                     sizeof(env->subprog_info[0]), cmp_subprogs);
1442         if (!p)
1443                 return -ENOENT;
1444         return p - env->subprog_info;
1445
1446 }
1447
1448 static int add_subprog(struct bpf_verifier_env *env, int off)
1449 {
1450         int insn_cnt = env->prog->len;
1451         int ret;
1452
1453         if (off >= insn_cnt || off < 0) {
1454                 verbose(env, "call to invalid destination\n");
1455                 return -EINVAL;
1456         }
1457         ret = find_subprog(env, off);
1458         if (ret >= 0)
1459                 return 0;
1460         if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1461                 verbose(env, "too many subprograms\n");
1462                 return -E2BIG;
1463         }
1464         env->subprog_info[env->subprog_cnt++].start = off;
1465         sort(env->subprog_info, env->subprog_cnt,
1466              sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1467         return 0;
1468 }
1469
1470 static int check_subprogs(struct bpf_verifier_env *env)
1471 {
1472         int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1473         struct bpf_subprog_info *subprog = env->subprog_info;
1474         struct bpf_insn *insn = env->prog->insnsi;
1475         int insn_cnt = env->prog->len;
1476
1477         /* Add entry function. */
1478         ret = add_subprog(env, 0);
1479         if (ret < 0)
1480                 return ret;
1481
1482         /* determine subprog starts. The end is one before the next starts */
1483         for (i = 0; i < insn_cnt; i++) {
1484                 if (insn[i].code != (BPF_JMP | BPF_CALL))
1485                         continue;
1486                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
1487                         continue;
1488                 if (!env->bpf_capable) {
1489                         verbose(env,
1490                                 "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1491                         return -EPERM;
1492                 }
1493                 ret = add_subprog(env, i + insn[i].imm + 1);
1494                 if (ret < 0)
1495                         return ret;
1496         }
1497
1498         /* Add a fake 'exit' subprog which could simplify subprog iteration
1499          * logic. 'subprog_cnt' should not be increased.
1500          */
1501         subprog[env->subprog_cnt].start = insn_cnt;
1502
1503         if (env->log.level & BPF_LOG_LEVEL2)
1504                 for (i = 0; i < env->subprog_cnt; i++)
1505                         verbose(env, "func#%d @%d\n", i, subprog[i].start);
1506
1507         /* now check that all jumps are within the same subprog */
1508         subprog_start = subprog[cur_subprog].start;
1509         subprog_end = subprog[cur_subprog + 1].start;
1510         for (i = 0; i < insn_cnt; i++) {
1511                 u8 code = insn[i].code;
1512
1513                 if (code == (BPF_JMP | BPF_CALL) &&
1514                     insn[i].imm == BPF_FUNC_tail_call &&
1515                     insn[i].src_reg != BPF_PSEUDO_CALL)
1516                         subprog[cur_subprog].has_tail_call = true;
1517                 if (BPF_CLASS(code) == BPF_LD &&
1518                     (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
1519                         subprog[cur_subprog].has_ld_abs = true;
1520                 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1521                         goto next;
1522                 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1523                         goto next;
1524                 off = i + insn[i].off + 1;
1525                 if (off < subprog_start || off >= subprog_end) {
1526                         verbose(env, "jump out of range from insn %d to %d\n", i, off);
1527                         return -EINVAL;
1528                 }
1529 next:
1530                 if (i == subprog_end - 1) {
1531                         /* to avoid fall-through from one subprog into another
1532                          * the last insn of the subprog should be either exit
1533                          * or unconditional jump back
1534                          */
1535                         if (code != (BPF_JMP | BPF_EXIT) &&
1536                             code != (BPF_JMP | BPF_JA)) {
1537                                 verbose(env, "last insn is not an exit or jmp\n");
1538                                 return -EINVAL;
1539                         }
1540                         subprog_start = subprog_end;
1541                         cur_subprog++;
1542                         if (cur_subprog < env->subprog_cnt)
1543                                 subprog_end = subprog[cur_subprog + 1].start;
1544                 }
1545         }
1546         return 0;
1547 }
1548
1549 /* Parentage chain of this register (or stack slot) should take care of all
1550  * issues like callee-saved registers, stack slot allocation time, etc.
1551  */
1552 static int mark_reg_read(struct bpf_verifier_env *env,
1553                          const struct bpf_reg_state *state,
1554                          struct bpf_reg_state *parent, u8 flag)
1555 {
1556         bool writes = parent == state->parent; /* Observe write marks */
1557         int cnt = 0;
1558
1559         while (parent) {
1560                 /* if read wasn't screened by an earlier write ... */
1561                 if (writes && state->live & REG_LIVE_WRITTEN)
1562                         break;
1563                 if (parent->live & REG_LIVE_DONE) {
1564                         verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1565                                 reg_type_str[parent->type],
1566                                 parent->var_off.value, parent->off);
1567                         return -EFAULT;
1568                 }
1569                 /* The first condition is more likely to be true than the
1570                  * second, checked it first.
1571                  */
1572                 if ((parent->live & REG_LIVE_READ) == flag ||
1573                     parent->live & REG_LIVE_READ64)
1574                         /* The parentage chain never changes and
1575                          * this parent was already marked as LIVE_READ.
1576                          * There is no need to keep walking the chain again and
1577                          * keep re-marking all parents as LIVE_READ.
1578                          * This case happens when the same register is read
1579                          * multiple times without writes into it in-between.
1580                          * Also, if parent has the stronger REG_LIVE_READ64 set,
1581                          * then no need to set the weak REG_LIVE_READ32.
1582                          */
1583                         break;
1584                 /* ... then we depend on parent's value */
1585                 parent->live |= flag;
1586                 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1587                 if (flag == REG_LIVE_READ64)
1588                         parent->live &= ~REG_LIVE_READ32;
1589                 state = parent;
1590                 parent = state->parent;
1591                 writes = true;
1592                 cnt++;
1593         }
1594
1595         if (env->longest_mark_read_walk < cnt)
1596                 env->longest_mark_read_walk = cnt;
1597         return 0;
1598 }
1599
1600 /* This function is supposed to be used by the following 32-bit optimization
1601  * code only. It returns TRUE if the source or destination register operates
1602  * on 64-bit, otherwise return FALSE.
1603  */
1604 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1605                      u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1606 {
1607         u8 code, class, op;
1608
1609         code = insn->code;
1610         class = BPF_CLASS(code);
1611         op = BPF_OP(code);
1612         if (class == BPF_JMP) {
1613                 /* BPF_EXIT for "main" will reach here. Return TRUE
1614                  * conservatively.
1615                  */
1616                 if (op == BPF_EXIT)
1617                         return true;
1618                 if (op == BPF_CALL) {
1619                         /* BPF to BPF call will reach here because of marking
1620                          * caller saved clobber with DST_OP_NO_MARK for which we
1621                          * don't care the register def because they are anyway
1622                          * marked as NOT_INIT already.
1623                          */
1624                         if (insn->src_reg == BPF_PSEUDO_CALL)
1625                                 return false;
1626                         /* Helper call will reach here because of arg type
1627                          * check, conservatively return TRUE.
1628                          */
1629                         if (t == SRC_OP)
1630                                 return true;
1631
1632                         return false;
1633                 }
1634         }
1635
1636         if (class == BPF_ALU64 || class == BPF_JMP ||
1637             /* BPF_END always use BPF_ALU class. */
1638             (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1639                 return true;
1640
1641         if (class == BPF_ALU || class == BPF_JMP32)
1642                 return false;
1643
1644         if (class == BPF_LDX) {
1645                 if (t != SRC_OP)
1646                         return BPF_SIZE(code) == BPF_DW;
1647                 /* LDX source must be ptr. */
1648                 return true;
1649         }
1650
1651         if (class == BPF_STX) {
1652                 if (reg->type != SCALAR_VALUE)
1653                         return true;
1654                 return BPF_SIZE(code) == BPF_DW;
1655         }
1656
1657         if (class == BPF_LD) {
1658                 u8 mode = BPF_MODE(code);
1659
1660                 /* LD_IMM64 */
1661                 if (mode == BPF_IMM)
1662                         return true;
1663
1664                 /* Both LD_IND and LD_ABS return 32-bit data. */
1665                 if (t != SRC_OP)
1666                         return  false;
1667
1668                 /* Implicit ctx ptr. */
1669                 if (regno == BPF_REG_6)
1670                         return true;
1671
1672                 /* Explicit source could be any width. */
1673                 return true;
1674         }
1675
1676         if (class == BPF_ST)
1677                 /* The only source register for BPF_ST is a ptr. */
1678                 return true;
1679
1680         /* Conservatively return true at default. */
1681         return true;
1682 }
1683
1684 /* Return TRUE if INSN doesn't have explicit value define. */
1685 static bool insn_no_def(struct bpf_insn *insn)
1686 {
1687         u8 class = BPF_CLASS(insn->code);
1688
1689         return (class == BPF_JMP || class == BPF_JMP32 ||
1690                 class == BPF_STX || class == BPF_ST);
1691 }
1692
1693 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
1694 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1695 {
1696         if (insn_no_def(insn))
1697                 return false;
1698
1699         return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1700 }
1701
1702 static void mark_insn_zext(struct bpf_verifier_env *env,
1703                            struct bpf_reg_state *reg)
1704 {
1705         s32 def_idx = reg->subreg_def;
1706
1707         if (def_idx == DEF_NOT_SUBREG)
1708                 return;
1709
1710         env->insn_aux_data[def_idx - 1].zext_dst = true;
1711         /* The dst will be zero extended, so won't be sub-register anymore. */
1712         reg->subreg_def = DEF_NOT_SUBREG;
1713 }
1714
1715 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1716                          enum reg_arg_type t)
1717 {
1718         struct bpf_verifier_state *vstate = env->cur_state;
1719         struct bpf_func_state *state = vstate->frame[vstate->curframe];
1720         struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1721         struct bpf_reg_state *reg, *regs = state->regs;
1722         bool rw64;
1723
1724         if (regno >= MAX_BPF_REG) {
1725                 verbose(env, "R%d is invalid\n", regno);
1726                 return -EINVAL;
1727         }
1728
1729         reg = &regs[regno];
1730         rw64 = is_reg64(env, insn, regno, reg, t);
1731         if (t == SRC_OP) {
1732                 /* check whether register used as source operand can be read */
1733                 if (reg->type == NOT_INIT) {
1734                         verbose(env, "R%d !read_ok\n", regno);
1735                         return -EACCES;
1736                 }
1737                 /* We don't need to worry about FP liveness because it's read-only */
1738                 if (regno == BPF_REG_FP)
1739                         return 0;
1740
1741                 if (rw64)
1742                         mark_insn_zext(env, reg);
1743
1744                 return mark_reg_read(env, reg, reg->parent,
1745                                      rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1746         } else {
1747                 /* check whether register used as dest operand can be written to */
1748                 if (regno == BPF_REG_FP) {
1749                         verbose(env, "frame pointer is read only\n");
1750                         return -EACCES;
1751                 }
1752                 reg->live |= REG_LIVE_WRITTEN;
1753                 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1754                 if (t == DST_OP)
1755                         mark_reg_unknown(env, regs, regno);
1756         }
1757         return 0;
1758 }
1759
1760 /* for any branch, call, exit record the history of jmps in the given state */
1761 static int push_jmp_history(struct bpf_verifier_env *env,
1762                             struct bpf_verifier_state *cur)
1763 {
1764         u32 cnt = cur->jmp_history_cnt;
1765         struct bpf_idx_pair *p;
1766
1767         cnt++;
1768         p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1769         if (!p)
1770                 return -ENOMEM;
1771         p[cnt - 1].idx = env->insn_idx;
1772         p[cnt - 1].prev_idx = env->prev_insn_idx;
1773         cur->jmp_history = p;
1774         cur->jmp_history_cnt = cnt;
1775         return 0;
1776 }
1777
1778 /* Backtrack one insn at a time. If idx is not at the top of recorded
1779  * history then previous instruction came from straight line execution.
1780  */
1781 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1782                              u32 *history)
1783 {
1784         u32 cnt = *history;
1785
1786         if (cnt && st->jmp_history[cnt - 1].idx == i) {
1787                 i = st->jmp_history[cnt - 1].prev_idx;
1788                 (*history)--;
1789         } else {
1790                 i--;
1791         }
1792         return i;
1793 }
1794
1795 /* For given verifier state backtrack_insn() is called from the last insn to
1796  * the first insn. Its purpose is to compute a bitmask of registers and
1797  * stack slots that needs precision in the parent verifier state.
1798  */
1799 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1800                           u32 *reg_mask, u64 *stack_mask)
1801 {
1802         const struct bpf_insn_cbs cbs = {
1803                 .cb_print       = verbose,
1804                 .private_data   = env,
1805         };
1806         struct bpf_insn *insn = env->prog->insnsi + idx;
1807         u8 class = BPF_CLASS(insn->code);
1808         u8 opcode = BPF_OP(insn->code);
1809         u8 mode = BPF_MODE(insn->code);
1810         u32 dreg = 1u << insn->dst_reg;
1811         u32 sreg = 1u << insn->src_reg;
1812         u32 spi;
1813
1814         if (insn->code == 0)
1815                 return 0;
1816         if (env->log.level & BPF_LOG_LEVEL) {
1817                 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1818                 verbose(env, "%d: ", idx);
1819                 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1820         }
1821
1822         if (class == BPF_ALU || class == BPF_ALU64) {
1823                 if (!(*reg_mask & dreg))
1824                         return 0;
1825                 if (opcode == BPF_MOV) {
1826                         if (BPF_SRC(insn->code) == BPF_X) {
1827                                 /* dreg = sreg
1828                                  * dreg needs precision after this insn
1829                                  * sreg needs precision before this insn
1830                                  */
1831                                 *reg_mask &= ~dreg;
1832                                 *reg_mask |= sreg;
1833                         } else {
1834                                 /* dreg = K
1835                                  * dreg needs precision after this insn.
1836                                  * Corresponding register is already marked
1837                                  * as precise=true in this verifier state.
1838                                  * No further markings in parent are necessary
1839                                  */
1840                                 *reg_mask &= ~dreg;
1841                         }
1842                 } else {
1843                         if (BPF_SRC(insn->code) == BPF_X) {
1844                                 /* dreg += sreg
1845                                  * both dreg and sreg need precision
1846                                  * before this insn
1847                                  */
1848                                 *reg_mask |= sreg;
1849                         } /* else dreg += K
1850                            * dreg still needs precision before this insn
1851                            */
1852                 }
1853         } else if (class == BPF_LDX) {
1854                 if (!(*reg_mask & dreg))
1855                         return 0;
1856                 *reg_mask &= ~dreg;
1857
1858                 /* scalars can only be spilled into stack w/o losing precision.
1859                  * Load from any other memory can be zero extended.
1860                  * The desire to keep that precision is already indicated
1861                  * by 'precise' mark in corresponding register of this state.
1862                  * No further tracking necessary.
1863                  */
1864                 if (insn->src_reg != BPF_REG_FP)
1865                         return 0;
1866                 if (BPF_SIZE(insn->code) != BPF_DW)
1867                         return 0;
1868
1869                 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1870                  * that [fp - off] slot contains scalar that needs to be
1871                  * tracked with precision
1872                  */
1873                 spi = (-insn->off - 1) / BPF_REG_SIZE;
1874                 if (spi >= 64) {
1875                         verbose(env, "BUG spi %d\n", spi);
1876                         WARN_ONCE(1, "verifier backtracking bug");
1877                         return -EFAULT;
1878                 }
1879                 *stack_mask |= 1ull << spi;
1880         } else if (class == BPF_STX || class == BPF_ST) {
1881                 if (*reg_mask & dreg)
1882                         /* stx & st shouldn't be using _scalar_ dst_reg
1883                          * to access memory. It means backtracking
1884                          * encountered a case of pointer subtraction.
1885                          */
1886                         return -ENOTSUPP;
1887                 /* scalars can only be spilled into stack */
1888                 if (insn->dst_reg != BPF_REG_FP)
1889                         return 0;
1890                 if (BPF_SIZE(insn->code) != BPF_DW)
1891                         return 0;
1892                 spi = (-insn->off - 1) / BPF_REG_SIZE;
1893                 if (spi >= 64) {
1894                         verbose(env, "BUG spi %d\n", spi);
1895                         WARN_ONCE(1, "verifier backtracking bug");
1896                         return -EFAULT;
1897                 }
1898                 if (!(*stack_mask & (1ull << spi)))
1899                         return 0;
1900                 *stack_mask &= ~(1ull << spi);
1901                 if (class == BPF_STX)
1902                         *reg_mask |= sreg;
1903         } else if (class == BPF_JMP || class == BPF_JMP32) {
1904                 if (opcode == BPF_CALL) {
1905                         if (insn->src_reg == BPF_PSEUDO_CALL)
1906                                 return -ENOTSUPP;
1907                         /* regular helper call sets R0 */
1908                         *reg_mask &= ~1;
1909                         if (*reg_mask & 0x3f) {
1910                                 /* if backtracing was looking for registers R1-R5
1911                                  * they should have been found already.
1912                                  */
1913                                 verbose(env, "BUG regs %x\n", *reg_mask);
1914                                 WARN_ONCE(1, "verifier backtracking bug");
1915                                 return -EFAULT;
1916                         }
1917                 } else if (opcode == BPF_EXIT) {
1918                         return -ENOTSUPP;
1919                 }
1920         } else if (class == BPF_LD) {
1921                 if (!(*reg_mask & dreg))
1922                         return 0;
1923                 *reg_mask &= ~dreg;
1924                 /* It's ld_imm64 or ld_abs or ld_ind.
1925                  * For ld_imm64 no further tracking of precision
1926                  * into parent is necessary
1927                  */
1928                 if (mode == BPF_IND || mode == BPF_ABS)
1929                         /* to be analyzed */
1930                         return -ENOTSUPP;
1931         }
1932         return 0;
1933 }
1934
1935 /* the scalar precision tracking algorithm:
1936  * . at the start all registers have precise=false.
1937  * . scalar ranges are tracked as normal through alu and jmp insns.
1938  * . once precise value of the scalar register is used in:
1939  *   .  ptr + scalar alu
1940  *   . if (scalar cond K|scalar)
1941  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1942  *   backtrack through the verifier states and mark all registers and
1943  *   stack slots with spilled constants that these scalar regisers
1944  *   should be precise.
1945  * . during state pruning two registers (or spilled stack slots)
1946  *   are equivalent if both are not precise.
1947  *
1948  * Note the verifier cannot simply walk register parentage chain,
1949  * since many different registers and stack slots could have been
1950  * used to compute single precise scalar.
1951  *
1952  * The approach of starting with precise=true for all registers and then
1953  * backtrack to mark a register as not precise when the verifier detects
1954  * that program doesn't care about specific value (e.g., when helper
1955  * takes register as ARG_ANYTHING parameter) is not safe.
1956  *
1957  * It's ok to walk single parentage chain of the verifier states.
1958  * It's possible that this backtracking will go all the way till 1st insn.
1959  * All other branches will be explored for needing precision later.
1960  *
1961  * The backtracking needs to deal with cases like:
1962  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1963  * r9 -= r8
1964  * r5 = r9
1965  * if r5 > 0x79f goto pc+7
1966  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1967  * r5 += 1
1968  * ...
1969  * call bpf_perf_event_output#25
1970  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1971  *
1972  * and this case:
1973  * r6 = 1
1974  * call foo // uses callee's r6 inside to compute r0
1975  * r0 += r6
1976  * if r0 == 0 goto
1977  *
1978  * to track above reg_mask/stack_mask needs to be independent for each frame.
1979  *
1980  * Also if parent's curframe > frame where backtracking started,
1981  * the verifier need to mark registers in both frames, otherwise callees
1982  * may incorrectly prune callers. This is similar to
1983  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1984  *
1985  * For now backtracking falls back into conservative marking.
1986  */
1987 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1988                                      struct bpf_verifier_state *st)
1989 {
1990         struct bpf_func_state *func;
1991         struct bpf_reg_state *reg;
1992         int i, j;
1993
1994         /* big hammer: mark all scalars precise in this path.
1995          * pop_stack may still get !precise scalars.
1996          */
1997         for (; st; st = st->parent)
1998                 for (i = 0; i <= st->curframe; i++) {
1999                         func = st->frame[i];
2000                         for (j = 0; j < BPF_REG_FP; j++) {
2001                                 reg = &func->regs[j];
2002                                 if (reg->type != SCALAR_VALUE)
2003                                         continue;
2004                                 reg->precise = true;
2005                         }
2006                         for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2007                                 if (func->stack[j].slot_type[0] != STACK_SPILL)
2008                                         continue;
2009                                 reg = &func->stack[j].spilled_ptr;
2010                                 if (reg->type != SCALAR_VALUE)
2011                                         continue;
2012                                 reg->precise = true;
2013                         }
2014                 }
2015 }
2016
2017 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2018                                   int spi)
2019 {
2020         struct bpf_verifier_state *st = env->cur_state;
2021         int first_idx = st->first_insn_idx;
2022         int last_idx = env->insn_idx;
2023         struct bpf_func_state *func;
2024         struct bpf_reg_state *reg;
2025         u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2026         u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2027         bool skip_first = true;
2028         bool new_marks = false;
2029         int i, err;
2030
2031         if (!env->bpf_capable)
2032                 return 0;
2033
2034         func = st->frame[st->curframe];
2035         if (regno >= 0) {
2036                 reg = &func->regs[regno];
2037                 if (reg->type != SCALAR_VALUE) {
2038                         WARN_ONCE(1, "backtracing misuse");
2039                         return -EFAULT;
2040                 }
2041                 if (!reg->precise)
2042                         new_marks = true;
2043                 else
2044                         reg_mask = 0;
2045                 reg->precise = true;
2046         }
2047
2048         while (spi >= 0) {
2049                 if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2050                         stack_mask = 0;
2051                         break;
2052                 }
2053                 reg = &func->stack[spi].spilled_ptr;
2054                 if (reg->type != SCALAR_VALUE) {
2055                         stack_mask = 0;
2056                         break;
2057                 }
2058                 if (!reg->precise)
2059                         new_marks = true;
2060                 else
2061                         stack_mask = 0;
2062                 reg->precise = true;
2063                 break;
2064         }
2065
2066         if (!new_marks)
2067                 return 0;
2068         if (!reg_mask && !stack_mask)
2069                 return 0;
2070         for (;;) {
2071                 DECLARE_BITMAP(mask, 64);
2072                 u32 history = st->jmp_history_cnt;
2073
2074                 if (env->log.level & BPF_LOG_LEVEL)
2075                         verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2076                 for (i = last_idx;;) {
2077                         if (skip_first) {
2078                                 err = 0;
2079                                 skip_first = false;
2080                         } else {
2081                                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2082                         }
2083                         if (err == -ENOTSUPP) {
2084                                 mark_all_scalars_precise(env, st);
2085                                 return 0;
2086                         } else if (err) {
2087                                 return err;
2088                         }
2089                         if (!reg_mask && !stack_mask)
2090                                 /* Found assignment(s) into tracked register in this state.
2091                                  * Since this state is already marked, just return.
2092                                  * Nothing to be tracked further in the parent state.
2093                                  */
2094                                 return 0;
2095                         if (i == first_idx)
2096                                 break;
2097                         i = get_prev_insn_idx(st, i, &history);
2098                         if (i >= env->prog->len) {
2099                                 /* This can happen if backtracking reached insn 0
2100                                  * and there are still reg_mask or stack_mask
2101                                  * to backtrack.
2102                                  * It means the backtracking missed the spot where
2103                                  * particular register was initialized with a constant.
2104                                  */
2105                                 verbose(env, "BUG backtracking idx %d\n", i);
2106                                 WARN_ONCE(1, "verifier backtracking bug");
2107                                 return -EFAULT;
2108                         }
2109                 }
2110                 st = st->parent;
2111                 if (!st)
2112                         break;
2113
2114                 new_marks = false;
2115                 func = st->frame[st->curframe];
2116                 bitmap_from_u64(mask, reg_mask);
2117                 for_each_set_bit(i, mask, 32) {
2118                         reg = &func->regs[i];
2119                         if (reg->type != SCALAR_VALUE) {
2120                                 reg_mask &= ~(1u << i);
2121                                 continue;
2122                         }
2123                         if (!reg->precise)
2124                                 new_marks = true;
2125                         reg->precise = true;
2126                 }
2127
2128                 bitmap_from_u64(mask, stack_mask);
2129                 for_each_set_bit(i, mask, 64) {
2130                         if (i >= func->allocated_stack / BPF_REG_SIZE) {
2131                                 /* the sequence of instructions:
2132                                  * 2: (bf) r3 = r10
2133                                  * 3: (7b) *(u64 *)(r3 -8) = r0
2134                                  * 4: (79) r4 = *(u64 *)(r10 -8)
2135                                  * doesn't contain jmps. It's backtracked
2136                                  * as a single block.
2137                                  * During backtracking insn 3 is not recognized as
2138                                  * stack access, so at the end of backtracking
2139                                  * stack slot fp-8 is still marked in stack_mask.
2140                                  * However the parent state may not have accessed
2141                                  * fp-8 and it's "unallocated" stack space.
2142                                  * In such case fallback to conservative.
2143                                  */
2144                                 mark_all_scalars_precise(env, st);
2145                                 return 0;
2146                         }
2147
2148                         if (func->stack[i].slot_type[0] != STACK_SPILL) {
2149                                 stack_mask &= ~(1ull << i);
2150                                 continue;
2151                         }
2152                         reg = &func->stack[i].spilled_ptr;
2153                         if (reg->type != SCALAR_VALUE) {
2154                                 stack_mask &= ~(1ull << i);
2155                                 continue;
2156                         }
2157                         if (!reg->precise)
2158                                 new_marks = true;
2159                         reg->precise = true;
2160                 }
2161                 if (env->log.level & BPF_LOG_LEVEL) {
2162                         print_verifier_state(env, func);
2163                         verbose(env, "parent %s regs=%x stack=%llx marks\n",
2164                                 new_marks ? "didn't have" : "already had",
2165                                 reg_mask, stack_mask);
2166                 }
2167
2168                 if (!reg_mask && !stack_mask)
2169                         break;
2170                 if (!new_marks)
2171                         break;
2172
2173                 last_idx = st->last_insn_idx;
2174                 first_idx = st->first_insn_idx;
2175         }
2176         return 0;
2177 }
2178
2179 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2180 {
2181         return __mark_chain_precision(env, regno, -1);
2182 }
2183
2184 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2185 {
2186         return __mark_chain_precision(env, -1, spi);
2187 }
2188
2189 static bool is_spillable_regtype(enum bpf_reg_type type)
2190 {
2191         switch (type) {
2192         case PTR_TO_MAP_VALUE:
2193         case PTR_TO_MAP_VALUE_OR_NULL:
2194         case PTR_TO_STACK:
2195         case PTR_TO_CTX:
2196         case PTR_TO_PACKET:
2197         case PTR_TO_PACKET_META:
2198         case PTR_TO_PACKET_END:
2199         case PTR_TO_FLOW_KEYS:
2200         case CONST_PTR_TO_MAP:
2201         case PTR_TO_SOCKET:
2202         case PTR_TO_SOCKET_OR_NULL:
2203         case PTR_TO_SOCK_COMMON:
2204         case PTR_TO_SOCK_COMMON_OR_NULL:
2205         case PTR_TO_TCP_SOCK:
2206         case PTR_TO_TCP_SOCK_OR_NULL:
2207         case PTR_TO_XDP_SOCK:
2208         case PTR_TO_BTF_ID:
2209         case PTR_TO_BTF_ID_OR_NULL:
2210         case PTR_TO_RDONLY_BUF:
2211         case PTR_TO_RDONLY_BUF_OR_NULL:
2212         case PTR_TO_RDWR_BUF:
2213         case PTR_TO_RDWR_BUF_OR_NULL:
2214         case PTR_TO_PERCPU_BTF_ID:
2215         case PTR_TO_MEM:
2216         case PTR_TO_MEM_OR_NULL:
2217                 return true;
2218         default:
2219                 return false;
2220         }
2221 }
2222
2223 /* Does this register contain a constant zero? */
2224 static bool register_is_null(struct bpf_reg_state *reg)
2225 {
2226         return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2227 }
2228
2229 static bool register_is_const(struct bpf_reg_state *reg)
2230 {
2231         return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2232 }
2233
2234 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2235 {
2236         return tnum_is_unknown(reg->var_off) &&
2237                reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2238                reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2239                reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2240                reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2241 }
2242
2243 static bool register_is_bounded(struct bpf_reg_state *reg)
2244 {
2245         return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2246 }
2247
2248 static bool __is_pointer_value(bool allow_ptr_leaks,
2249                                const struct bpf_reg_state *reg)
2250 {
2251         if (allow_ptr_leaks)
2252                 return false;
2253
2254         return reg->type != SCALAR_VALUE;
2255 }
2256
2257 static void save_register_state(struct bpf_func_state *state,
2258                                 int spi, struct bpf_reg_state *reg)
2259 {
2260         int i;
2261
2262         state->stack[spi].spilled_ptr = *reg;
2263         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2264
2265         for (i = 0; i < BPF_REG_SIZE; i++)
2266                 state->stack[spi].slot_type[i] = STACK_SPILL;
2267 }
2268
2269 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2270  * stack boundary and alignment are checked in check_mem_access()
2271  */
2272 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2273                                        /* stack frame we're writing to */
2274                                        struct bpf_func_state *state,
2275                                        int off, int size, int value_regno,
2276                                        int insn_idx)
2277 {
2278         struct bpf_func_state *cur; /* state of the current function */
2279         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2280         u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2281         struct bpf_reg_state *reg = NULL;
2282
2283         err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
2284                                  state->acquired_refs, true);
2285         if (err)
2286                 return err;
2287         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2288          * so it's aligned access and [off, off + size) are within stack limits
2289          */
2290         if (!env->allow_ptr_leaks &&
2291             state->stack[spi].slot_type[0] == STACK_SPILL &&
2292             size != BPF_REG_SIZE) {
2293                 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2294                 return -EACCES;
2295         }
2296
2297         cur = env->cur_state->frame[env->cur_state->curframe];
2298         if (value_regno >= 0)
2299                 reg = &cur->regs[value_regno];
2300         if (!env->bypass_spec_v4) {
2301                 bool sanitize = reg && is_spillable_regtype(reg->type);
2302
2303                 for (i = 0; i < size; i++) {
2304                         if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2305                                 sanitize = true;
2306                                 break;
2307                         }
2308                 }
2309
2310                 if (sanitize)
2311                         env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2312         }
2313
2314         if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
2315             !register_is_null(reg) && env->bpf_capable) {
2316                 if (dst_reg != BPF_REG_FP) {
2317                         /* The backtracking logic can only recognize explicit
2318                          * stack slot address like [fp - 8]. Other spill of
2319                          * scalar via different register has to be conervative.
2320                          * Backtrack from here and mark all registers as precise
2321                          * that contributed into 'reg' being a constant.
2322                          */
2323                         err = mark_chain_precision(env, value_regno);
2324                         if (err)
2325                                 return err;
2326                 }
2327                 save_register_state(state, spi, reg);
2328         } else if (reg && is_spillable_regtype(reg->type)) {
2329                 /* register containing pointer is being spilled into stack */
2330                 if (size != BPF_REG_SIZE) {
2331                         verbose_linfo(env, insn_idx, "; ");
2332                         verbose(env, "invalid size of register spill\n");
2333                         return -EACCES;
2334                 }
2335                 if (state != cur && reg->type == PTR_TO_STACK) {
2336                         verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2337                         return -EINVAL;
2338                 }
2339                 save_register_state(state, spi, reg);
2340         } else {
2341                 u8 type = STACK_MISC;
2342
2343                 /* regular write of data into stack destroys any spilled ptr */
2344                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2345                 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2346                 if (state->stack[spi].slot_type[0] == STACK_SPILL)
2347                         for (i = 0; i < BPF_REG_SIZE; i++)
2348                                 state->stack[spi].slot_type[i] = STACK_MISC;
2349
2350                 /* only mark the slot as written if all 8 bytes were written
2351                  * otherwise read propagation may incorrectly stop too soon
2352                  * when stack slots are partially written.
2353                  * This heuristic means that read propagation will be
2354                  * conservative, since it will add reg_live_read marks
2355                  * to stack slots all the way to first state when programs
2356                  * writes+reads less than 8 bytes
2357                  */
2358                 if (size == BPF_REG_SIZE)
2359                         state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2360
2361                 /* when we zero initialize stack slots mark them as such */
2362                 if (reg && register_is_null(reg)) {
2363                         /* backtracking doesn't work for STACK_ZERO yet. */
2364                         err = mark_chain_precision(env, value_regno);
2365                         if (err)
2366                                 return err;
2367                         type = STACK_ZERO;
2368                 }
2369
2370                 /* Mark slots affected by this stack write. */
2371                 for (i = 0; i < size; i++)
2372                         state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2373                                 type;
2374         }
2375         return 0;
2376 }
2377
2378 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2379  * known to contain a variable offset.
2380  * This function checks whether the write is permitted and conservatively
2381  * tracks the effects of the write, considering that each stack slot in the
2382  * dynamic range is potentially written to.
2383  *
2384  * 'off' includes 'regno->off'.
2385  * 'value_regno' can be -1, meaning that an unknown value is being written to
2386  * the stack.
2387  *
2388  * Spilled pointers in range are not marked as written because we don't know
2389  * what's going to be actually written. This means that read propagation for
2390  * future reads cannot be terminated by this write.
2391  *
2392  * For privileged programs, uninitialized stack slots are considered
2393  * initialized by this write (even though we don't know exactly what offsets
2394  * are going to be written to). The idea is that we don't want the verifier to
2395  * reject future reads that access slots written to through variable offsets.
2396  */
2397 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2398                                      /* func where register points to */
2399                                      struct bpf_func_state *state,
2400                                      int ptr_regno, int off, int size,
2401                                      int value_regno, int insn_idx)
2402 {
2403         struct bpf_func_state *cur; /* state of the current function */
2404         int min_off, max_off;
2405         int i, err;
2406         struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2407         bool writing_zero = false;
2408         /* set if the fact that we're writing a zero is used to let any
2409          * stack slots remain STACK_ZERO
2410          */
2411         bool zero_used = false;
2412
2413         cur = env->cur_state->frame[env->cur_state->curframe];
2414         ptr_reg = &cur->regs[ptr_regno];
2415         min_off = ptr_reg->smin_value + off;
2416         max_off = ptr_reg->smax_value + off + size;
2417         if (value_regno >= 0)
2418                 value_reg = &cur->regs[value_regno];
2419         if (value_reg && register_is_null(value_reg))
2420                 writing_zero = true;
2421
2422         err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
2423                                  state->acquired_refs, true);
2424         if (err)
2425                 return err;
2426
2427
2428         /* Variable offset writes destroy any spilled pointers in range. */
2429         for (i = min_off; i < max_off; i++) {
2430                 u8 new_type, *stype;
2431                 int slot, spi;
2432
2433                 slot = -i - 1;
2434                 spi = slot / BPF_REG_SIZE;
2435                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2436
2437                 if (!env->allow_ptr_leaks
2438                                 && *stype != NOT_INIT
2439                                 && *stype != SCALAR_VALUE) {
2440                         /* Reject the write if there's are spilled pointers in
2441                          * range. If we didn't reject here, the ptr status
2442                          * would be erased below (even though not all slots are
2443                          * actually overwritten), possibly opening the door to
2444                          * leaks.
2445                          */
2446                         verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2447                                 insn_idx, i);
2448                         return -EINVAL;
2449                 }
2450
2451                 /* Erase all spilled pointers. */
2452                 state->stack[spi].spilled_ptr.type = NOT_INIT;
2453
2454                 /* Update the slot type. */
2455                 new_type = STACK_MISC;
2456                 if (writing_zero && *stype == STACK_ZERO) {
2457                         new_type = STACK_ZERO;
2458                         zero_used = true;
2459                 }
2460                 /* If the slot is STACK_INVALID, we check whether it's OK to
2461                  * pretend that it will be initialized by this write. The slot
2462                  * might not actually be written to, and so if we mark it as
2463                  * initialized future reads might leak uninitialized memory.
2464                  * For privileged programs, we will accept such reads to slots
2465                  * that may or may not be written because, if we're reject
2466                  * them, the error would be too confusing.
2467                  */
2468                 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2469                         verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2470                                         insn_idx, i);
2471                         return -EINVAL;
2472                 }
2473                 *stype = new_type;
2474         }
2475         if (zero_used) {
2476                 /* backtracking doesn't work for STACK_ZERO yet. */
2477                 err = mark_chain_precision(env, value_regno);
2478                 if (err)
2479                         return err;
2480         }
2481         return 0;
2482 }
2483
2484 /* When register 'dst_regno' is assigned some values from stack[min_off,
2485  * max_off), we set the register's type according to the types of the
2486  * respective stack slots. If all the stack values are known to be zeros, then
2487  * so is the destination reg. Otherwise, the register is considered to be
2488  * SCALAR. This function does not deal with register filling; the caller must
2489  * ensure that all spilled registers in the stack range have been marked as
2490  * read.
2491  */
2492 static void mark_reg_stack_read(struct bpf_verifier_env *env,
2493                                 /* func where src register points to */
2494                                 struct bpf_func_state *ptr_state,
2495                                 int min_off, int max_off, int dst_regno)
2496 {
2497         struct bpf_verifier_state *vstate = env->cur_state;
2498         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2499         int i, slot, spi;
2500         u8 *stype;
2501         int zeros = 0;
2502
2503         for (i = min_off; i < max_off; i++) {
2504                 slot = -i - 1;
2505                 spi = slot / BPF_REG_SIZE;
2506                 stype = ptr_state->stack[spi].slot_type;
2507                 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
2508                         break;
2509                 zeros++;
2510         }
2511         if (zeros == max_off - min_off) {
2512                 /* any access_size read into register is zero extended,
2513                  * so the whole register == const_zero
2514                  */
2515                 __mark_reg_const_zero(&state->regs[dst_regno]);
2516                 /* backtracking doesn't support STACK_ZERO yet,
2517                  * so mark it precise here, so that later
2518                  * backtracking can stop here.
2519                  * Backtracking may not need this if this register
2520                  * doesn't participate in pointer adjustment.
2521                  * Forward propagation of precise flag is not
2522                  * necessary either. This mark is only to stop
2523                  * backtracking. Any register that contributed
2524                  * to const 0 was marked precise before spill.
2525                  */
2526                 state->regs[dst_regno].precise = true;
2527         } else {
2528                 /* have read misc data from the stack */
2529                 mark_reg_unknown(env, state->regs, dst_regno);
2530         }
2531         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2532 }
2533
2534 /* Read the stack at 'off' and put the results into the register indicated by
2535  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2536  * spilled reg.
2537  *
2538  * 'dst_regno' can be -1, meaning that the read value is not going to a
2539  * register.
2540  *
2541  * The access is assumed to be within the current stack bounds.
2542  */
2543 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2544                                       /* func where src register points to */
2545                                       struct bpf_func_state *reg_state,
2546                                       int off, int size, int dst_regno)
2547 {
2548         struct bpf_verifier_state *vstate = env->cur_state;
2549         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2550         int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2551         struct bpf_reg_state *reg;
2552         u8 *stype;
2553
2554         stype = reg_state->stack[spi].slot_type;
2555         reg = &reg_state->stack[spi].spilled_ptr;
2556
2557         if (stype[0] == STACK_SPILL) {
2558                 if (size != BPF_REG_SIZE) {
2559                         if (reg->type != SCALAR_VALUE) {
2560                                 verbose_linfo(env, env->insn_idx, "; ");
2561                                 verbose(env, "invalid size of register fill\n");
2562                                 return -EACCES;
2563                         }
2564                         if (dst_regno >= 0) {
2565                                 mark_reg_unknown(env, state->regs, dst_regno);
2566                                 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2567                         }
2568                         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2569                         return 0;
2570                 }
2571                 for (i = 1; i < BPF_REG_SIZE; i++) {
2572                         if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2573                                 verbose(env, "corrupted spill memory\n");
2574                                 return -EACCES;
2575                         }
2576                 }
2577
2578                 if (dst_regno >= 0) {
2579                         /* restore register state from stack */
2580                         state->regs[dst_regno] = *reg;
2581                         /* mark reg as written since spilled pointer state likely
2582                          * has its liveness marks cleared by is_state_visited()
2583                          * which resets stack/reg liveness for state transitions
2584                          */
2585                         state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2586                 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2587                         /* If dst_regno==-1, the caller is asking us whether
2588                          * it is acceptable to use this value as a SCALAR_VALUE
2589                          * (e.g. for XADD).
2590                          * We must not allow unprivileged callers to do that
2591                          * with spilled pointers.
2592                          */
2593                         verbose(env, "leaking pointer from stack off %d\n",
2594                                 off);
2595                         return -EACCES;
2596                 }
2597                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2598         } else {
2599                 u8 type;
2600
2601                 for (i = 0; i < size; i++) {
2602                         type = stype[(slot - i) % BPF_REG_SIZE];
2603                         if (type == STACK_MISC)
2604                                 continue;
2605                         if (type == STACK_ZERO)
2606                                 continue;
2607                         verbose(env, "invalid read from stack off %d+%d size %d\n",
2608                                 off, i, size);
2609                         return -EACCES;
2610                 }
2611                 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2612                 if (dst_regno >= 0)
2613                         mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
2614         }
2615         return 0;
2616 }
2617
2618 enum stack_access_src {
2619         ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
2620         ACCESS_HELPER = 2,  /* the access is performed by a helper */
2621 };
2622
2623 static int check_stack_range_initialized(struct bpf_verifier_env *env,
2624                                          int regno, int off, int access_size,
2625                                          bool zero_size_allowed,
2626                                          enum stack_access_src type,
2627                                          struct bpf_call_arg_meta *meta);
2628
2629 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2630 {
2631         return cur_regs(env) + regno;
2632 }
2633
2634 /* Read the stack at 'ptr_regno + off' and put the result into the register
2635  * 'dst_regno'.
2636  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2637  * but not its variable offset.
2638  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2639  *
2640  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2641  * filling registers (i.e. reads of spilled register cannot be detected when
2642  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2643  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2644  * offset; for a fixed offset check_stack_read_fixed_off should be used
2645  * instead.
2646  */
2647 static int check_stack_read_var_off(struct bpf_verifier_env *env,
2648                                     int ptr_regno, int off, int size, int dst_regno)
2649 {
2650         /* The state of the source register. */
2651         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2652         struct bpf_func_state *ptr_state = func(env, reg);
2653         int err;
2654         int min_off, max_off;
2655
2656         /* Note that we pass a NULL meta, so raw access will not be permitted.
2657          */
2658         err = check_stack_range_initialized(env, ptr_regno, off, size,
2659                                             false, ACCESS_DIRECT, NULL);
2660         if (err)
2661                 return err;
2662
2663         min_off = reg->smin_value + off;
2664         max_off = reg->smax_value + off;
2665         mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2666         return 0;
2667 }
2668
2669 /* check_stack_read dispatches to check_stack_read_fixed_off or
2670  * check_stack_read_var_off.
2671  *
2672  * The caller must ensure that the offset falls within the allocated stack
2673  * bounds.
2674  *
2675  * 'dst_regno' is a register which will receive the value from the stack. It
2676  * can be -1, meaning that the read value is not going to a register.
2677  */
2678 static int check_stack_read(struct bpf_verifier_env *env,
2679                             int ptr_regno, int off, int size,
2680                             int dst_regno)
2681 {
2682         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2683         struct bpf_func_state *state = func(env, reg);
2684         int err;
2685         /* Some accesses are only permitted with a static offset. */
2686         bool var_off = !tnum_is_const(reg->var_off);
2687
2688         /* The offset is required to be static when reads don't go to a
2689          * register, in order to not leak pointers (see
2690          * check_stack_read_fixed_off).
2691          */
2692         if (dst_regno < 0 && var_off) {
2693                 char tn_buf[48];
2694
2695                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2696                 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
2697                         tn_buf, off, size);
2698                 return -EACCES;
2699         }
2700         /* Variable offset is prohibited for unprivileged mode for simplicity
2701          * since it requires corresponding support in Spectre masking for stack
2702          * ALU. See also retrieve_ptr_limit().
2703          */
2704         if (!env->bypass_spec_v1 && var_off) {
2705                 char tn_buf[48];
2706
2707                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2708                 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
2709                                 ptr_regno, tn_buf);
2710                 return -EACCES;
2711         }
2712
2713         if (!var_off) {
2714                 off += reg->var_off.value;
2715                 err = check_stack_read_fixed_off(env, state, off, size,
2716                                                  dst_regno);
2717         } else {
2718                 /* Variable offset stack reads need more conservative handling
2719                  * than fixed offset ones. Note that dst_regno >= 0 on this
2720                  * branch.
2721                  */
2722                 err = check_stack_read_var_off(env, ptr_regno, off, size,
2723                                                dst_regno);
2724         }
2725         return err;
2726 }
2727
2728
2729 /* check_stack_write dispatches to check_stack_write_fixed_off or
2730  * check_stack_write_var_off.
2731  *
2732  * 'ptr_regno' is the register used as a pointer into the stack.
2733  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2734  * 'value_regno' is the register whose value we're writing to the stack. It can
2735  * be -1, meaning that we're not writing from a register.
2736  *
2737  * The caller must ensure that the offset falls within the maximum stack size.
2738  */
2739 static int check_stack_write(struct bpf_verifier_env *env,
2740                              int ptr_regno, int off, int size,
2741                              int value_regno, int insn_idx)
2742 {
2743         struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2744         struct bpf_func_state *state = func(env, reg);
2745         int err;
2746
2747         if (tnum_is_const(reg->var_off)) {
2748                 off += reg->var_off.value;
2749                 err = check_stack_write_fixed_off(env, state, off, size,
2750                                                   value_regno, insn_idx);
2751         } else {
2752                 /* Variable offset stack reads need more conservative handling
2753                  * than fixed offset ones.
2754                  */
2755                 err = check_stack_write_var_off(env, state,
2756                                                 ptr_regno, off, size,
2757                                                 value_regno, insn_idx);
2758         }
2759         return err;
2760 }
2761
2762 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2763                                  int off, int size, enum bpf_access_type type)
2764 {
2765         struct bpf_reg_state *regs = cur_regs(env);
2766         struct bpf_map *map = regs[regno].map_ptr;
2767         u32 cap = bpf_map_flags_to_cap(map);
2768
2769         if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2770                 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
2771                         map->value_size, off, size);
2772                 return -EACCES;
2773         }
2774
2775         if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2776                 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2777                         map->value_size, off, size);
2778                 return -EACCES;
2779         }
2780
2781         return 0;
2782 }
2783
2784 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
2785 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
2786                               int off, int size, u32 mem_size,
2787                               bool zero_size_allowed)
2788 {
2789         bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2790         struct bpf_reg_state *reg;
2791
2792         if (off >= 0 && size_ok && (u64)off + size <= mem_size)
2793                 return 0;
2794
2795         reg = &cur_regs(env)[regno];
2796         switch (reg->type) {
2797         case PTR_TO_MAP_VALUE:
2798                 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2799                         mem_size, off, size);
2800                 break;
2801         case PTR_TO_PACKET:
2802         case PTR_TO_PACKET_META:
2803         case PTR_TO_PACKET_END:
2804                 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
2805                         off, size, regno, reg->id, off, mem_size);
2806                 break;
2807         case PTR_TO_MEM:
2808         default:
2809                 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
2810                         mem_size, off, size);
2811         }
2812
2813         return -EACCES;
2814 }
2815
2816 /* check read/write into a memory region with possible variable offset */
2817 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
2818                                    int off, int size, u32 mem_size,
2819                                    bool zero_size_allowed)
2820 {
2821         struct bpf_verifier_state *vstate = env->cur_state;
2822         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2823         struct bpf_reg_state *reg = &state->regs[regno];
2824         int err;
2825
2826         /* We may have adjusted the register pointing to memory region, so we
2827          * need to try adding each of min_value and max_value to off
2828          * to make sure our theoretical access will be safe.
2829          */
2830         if (env->log.level & BPF_LOG_LEVEL)
2831                 print_verifier_state(env, state);
2832
2833         /* The minimum value is only important with signed
2834          * comparisons where we can't assume the floor of a
2835          * value is 0.  If we are using signed variables for our
2836          * index'es we need to make sure that whatever we use
2837          * will have a set floor within our range.
2838          */
2839         if (reg->smin_value < 0 &&
2840             (reg->smin_value == S64_MIN ||
2841              (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2842               reg->smin_value + off < 0)) {
2843                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2844                         regno);
2845                 return -EACCES;
2846         }
2847         err = __check_mem_access(env, regno, reg->smin_value + off, size,
2848                                  mem_size, zero_size_allowed);
2849         if (err) {
2850                 verbose(env, "R%d min value is outside of the allowed memory range\n",
2851                         regno);
2852                 return err;
2853         }
2854
2855         /* If we haven't set a max value then we need to bail since we can't be
2856          * sure we won't do bad things.
2857          * If reg->umax_value + off could overflow, treat that as unbounded too.
2858          */
2859         if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2860                 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
2861                         regno);
2862                 return -EACCES;
2863         }
2864         err = __check_mem_access(env, regno, reg->umax_value + off, size,
2865                                  mem_size, zero_size_allowed);
2866         if (err) {
2867                 verbose(env, "R%d max value is outside of the allowed memory range\n",
2868                         regno);
2869                 return err;
2870         }
2871
2872         return 0;
2873 }
2874
2875 /* check read/write into a map element with possible variable offset */
2876 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
2877                             int off, int size, bool zero_size_allowed)
2878 {
2879         struct bpf_verifier_state *vstate = env->cur_state;
2880         struct bpf_func_state *state = vstate->frame[vstate->curframe];
2881         struct bpf_reg_state *reg = &state->regs[regno];
2882         struct bpf_map *map = reg->map_ptr;
2883         int err;
2884
2885         err = check_mem_region_access(env, regno, off, size, map->value_size,
2886                                       zero_size_allowed);
2887         if (err)
2888                 return err;
2889
2890         if (map_value_has_spin_lock(map)) {
2891                 u32 lock = map->spin_lock_off;
2892
2893                 /* if any part of struct bpf_spin_lock can be touched by
2894                  * load/store reject this program.
2895                  * To check that [x1, x2) overlaps with [y1, y2)
2896                  * it is sufficient to check x1 < y2 && y1 < x2.
2897                  */
2898                 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2899                      lock < reg->umax_value + off + size) {
2900                         verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2901                         return -EACCES;
2902                 }
2903         }
2904         return err;
2905 }
2906
2907 #define MAX_PACKET_OFF 0xffff
2908
2909 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2910 {
2911         return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2912 }
2913
2914 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
2915                                        const struct bpf_call_arg_meta *meta,
2916                                        enum bpf_access_type t)
2917 {
2918         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2919
2920         switch (prog_type) {
2921         /* Program types only with direct read access go here! */
2922         case BPF_PROG_TYPE_LWT_IN:
2923         case BPF_PROG_TYPE_LWT_OUT:
2924         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2925         case BPF_PROG_TYPE_SK_REUSEPORT:
2926         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2927         case BPF_PROG_TYPE_CGROUP_SKB:
2928                 if (t == BPF_WRITE)
2929                         return false;
2930                 fallthrough;
2931
2932         /* Program types with direct read + write access go here! */
2933         case BPF_PROG_TYPE_SCHED_CLS:
2934         case BPF_PROG_TYPE_SCHED_ACT:
2935         case BPF_PROG_TYPE_XDP:
2936         case BPF_PROG_TYPE_LWT_XMIT:
2937         case BPF_PROG_TYPE_SK_SKB:
2938         case BPF_PROG_TYPE_SK_MSG:
2939                 if (meta)
2940                         return meta->pkt_access;
2941
2942                 env->seen_direct_write = true;
2943                 return true;
2944
2945         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2946                 if (t == BPF_WRITE)
2947                         env->seen_direct_write = true;
2948
2949                 return true;
2950
2951         default:
2952                 return false;
2953         }
2954 }
2955
2956 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
2957                                int size, bool zero_size_allowed)
2958 {
2959         struct bpf_reg_state *regs = cur_regs(env);
2960         struct bpf_reg_state *reg = &regs[regno];
2961         int err;
2962
2963         /* We may have added a variable offset to the packet pointer; but any
2964          * reg->range we have comes after that.  We are only checking the fixed
2965          * offset.
2966          */
2967
2968         /* We don't allow negative numbers, because we aren't tracking enough
2969          * detail to prove they're safe.
2970          */
2971         if (reg->smin_value < 0) {
2972                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2973                         regno);
2974                 return -EACCES;
2975         }
2976         err = __check_mem_access(env, regno, off, size, reg->range,
2977                                  zero_size_allowed);
2978         if (err) {
2979                 verbose(env, "R%d offset is outside of the packet\n", regno);
2980                 return err;
2981         }
2982
2983         /* __check_mem_access has made sure "off + size - 1" is within u16.
2984          * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2985          * otherwise find_good_pkt_pointers would have refused to set range info
2986          * that __check_mem_access would have rejected this pkt access.
2987          * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2988          */
2989         env->prog->aux->max_pkt_offset =
2990                 max_t(u32, env->prog->aux->max_pkt_offset,
2991                       off + reg->umax_value + size - 1);
2992
2993         return err;
2994 }
2995
2996 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
2997 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
2998                             enum bpf_access_type t, enum bpf_reg_type *reg_type,
2999                             u32 *btf_id)
3000 {
3001         struct bpf_insn_access_aux info = {
3002                 .reg_type = *reg_type,
3003                 .log = &env->log,
3004         };
3005
3006         if (env->ops->is_valid_access &&
3007             env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3008                 /* A non zero info.ctx_field_size indicates that this field is a
3009                  * candidate for later verifier transformation to load the whole
3010                  * field and then apply a mask when accessed with a narrower
3011                  * access than actual ctx access size. A zero info.ctx_field_size
3012                  * will only allow for whole field access and rejects any other
3013                  * type of narrower access.
3014                  */
3015                 *reg_type = info.reg_type;
3016
3017                 if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
3018                         *btf_id = info.btf_id;
3019                 else
3020                         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3021                 /* remember the offset of last byte accessed in ctx */
3022                 if (env->prog->aux->max_ctx_offset < off + size)
3023                         env->prog->aux->max_ctx_offset = off + size;
3024                 return 0;
3025         }
3026
3027         verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3028         return -EACCES;
3029 }
3030
3031 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3032                                   int size)
3033 {
3034         if (size < 0 || off < 0 ||
3035             (u64)off + size > sizeof(struct bpf_flow_keys)) {
3036                 verbose(env, "invalid access to flow keys off=%d size=%d\n",
3037                         off, size);
3038                 return -EACCES;
3039         }
3040         return 0;
3041 }
3042
3043 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3044                              u32 regno, int off, int size,
3045                              enum bpf_access_type t)
3046 {
3047         struct bpf_reg_state *regs = cur_regs(env);
3048         struct bpf_reg_state *reg = &regs[regno];
3049         struct bpf_insn_access_aux info = {};
3050         bool valid;
3051
3052         if (reg->smin_value < 0) {
3053                 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3054                         regno);
3055                 return -EACCES;
3056         }
3057
3058         switch (reg->type) {
3059         case PTR_TO_SOCK_COMMON:
3060                 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3061                 break;
3062         case PTR_TO_SOCKET:
3063                 valid = bpf_sock_is_valid_access(off, size, t, &info);
3064                 break;
3065         case PTR_TO_TCP_SOCK:
3066                 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3067                 break;
3068         case PTR_TO_XDP_SOCK:
3069                 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3070                 break;
3071         default:
3072                 valid = false;
3073         }
3074
3075
3076         if (valid) {
3077                 env->insn_aux_data[insn_idx].ctx_field_size =
3078                         info.ctx_field_size;
3079                 return 0;
3080         }
3081
3082         verbose(env, "R%d invalid %s access off=%d size=%d\n",
3083                 regno, reg_type_str[reg->type], off, size);
3084
3085         return -EACCES;
3086 }
3087
3088 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3089 {
3090         return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3091 }
3092
3093 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3094 {
3095         const struct bpf_reg_state *reg = reg_state(env, regno);
3096
3097         return reg->type == PTR_TO_CTX;
3098 }
3099
3100 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3101 {
3102         const struct bpf_reg_state *reg = reg_state(env, regno);
3103
3104         return type_is_sk_pointer(reg->type);
3105 }
3106
3107 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3108 {
3109         const struct bpf_reg_state *reg = reg_state(env, regno);
3110
3111         return type_is_pkt_pointer(reg->type);
3112 }
3113
3114 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3115 {
3116         const struct bpf_reg_state *reg = reg_state(env, regno);
3117
3118         /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3119         return reg->type == PTR_TO_FLOW_KEYS;
3120 }
3121
3122 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3123                                    const struct bpf_reg_state *reg,
3124                                    int off, int size, bool strict)
3125 {
3126         struct tnum reg_off;
3127         int ip_align;
3128
3129         /* Byte size accesses are always allowed. */
3130         if (!strict || size == 1)
3131                 return 0;
3132
3133         /* For platforms that do not have a Kconfig enabling
3134          * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3135          * NET_IP_ALIGN is universally set to '2'.  And on platforms
3136          * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3137          * to this code only in strict mode where we want to emulate
3138          * the NET_IP_ALIGN==2 checking.  Therefore use an
3139          * unconditional IP align value of '2'.
3140          */
3141         ip_align = 2;
3142
3143         reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3144         if (!tnum_is_aligned(reg_off, size)) {
3145                 char tn_buf[48];
3146
3147                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3148                 verbose(env,
3149                         "misaligned packet access off %d+%s+%d+%d size %d\n",
3150                         ip_align, tn_buf, reg->off, off, size);
3151                 return -EACCES;
3152         }
3153
3154         return 0;
3155 }
3156
3157 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3158                                        const struct bpf_reg_state *reg,
3159                                        const char *pointer_desc,
3160                                        int off, int size, bool strict)
3161 {
3162         struct tnum reg_off;
3163
3164         /* Byte size accesses are always allowed. */
3165         if (!strict || size == 1)
3166                 return 0;
3167
3168         reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3169         if (!tnum_is_aligned(reg_off, size)) {
3170                 char tn_buf[48];
3171
3172                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3173                 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3174                         pointer_desc, tn_buf, reg->off, off, size);
3175                 return -EACCES;
3176         }
3177
3178         return 0;
3179 }
3180
3181 static int check_ptr_alignment(struct bpf_verifier_env *env,
3182                                const struct bpf_reg_state *reg, int off,
3183                                int size, bool strict_alignment_once)
3184 {
3185         bool strict = env->strict_alignment || strict_alignment_once;
3186         const char *pointer_desc = "";
3187
3188         switch (reg->type) {
3189         case PTR_TO_PACKET:
3190         case PTR_TO_PACKET_META:
3191                 /* Special case, because of NET_IP_ALIGN. Given metadata sits
3192                  * right in front, treat it the very same way.
3193                  */
3194                 return check_pkt_ptr_alignment(env, reg, off, size, strict);
3195         case PTR_TO_FLOW_KEYS:
3196                 pointer_desc = "flow keys ";
3197                 break;
3198         case PTR_TO_MAP_VALUE:
3199                 pointer_desc = "value ";
3200                 break;
3201         case PTR_TO_CTX:
3202                 pointer_desc = "context ";
3203                 break;
3204         case PTR_TO_STACK:
3205                 pointer_desc = "stack ";
3206                 /* The stack spill tracking logic in check_stack_write_fixed_off()
3207                  * and check_stack_read_fixed_off() relies on stack accesses being
3208                  * aligned.
3209                  */
3210                 strict = true;
3211                 break;
3212         case PTR_TO_SOCKET:
3213                 pointer_desc = "sock ";
3214                 break;
3215         case PTR_TO_SOCK_COMMON:
3216                 pointer_desc = "sock_common ";
3217                 break;
3218         case PTR_TO_TCP_SOCK:
3219                 pointer_desc = "tcp_sock ";
3220                 break;
3221         case PTR_TO_XDP_SOCK:
3222                 pointer_desc = "xdp_sock ";
3223                 break;
3224         default:
3225                 break;
3226         }
3227         return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3228                                            strict);
3229 }
3230
3231 static int update_stack_depth(struct bpf_verifier_env *env,
3232                               const struct bpf_func_state *func,
3233                               int off)
3234 {
3235         u16 stack = env->subprog_info[func->subprogno].stack_depth;
3236
3237         if (stack >= -off)
3238                 return 0;
3239
3240         /* update known max for given subprogram */
3241         env->subprog_info[func->subprogno].stack_depth = -off;
3242         return 0;
3243 }
3244
3245 /* starting from main bpf function walk all instructions of the function
3246  * and recursively walk all callees that given function can call.
3247  * Ignore jump and exit insns.
3248  * Since recursion is prevented by check_cfg() this algorithm
3249  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3250  */
3251 static int check_max_stack_depth(struct bpf_verifier_env *env)
3252 {
3253         int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3254         struct bpf_subprog_info *subprog = env->subprog_info;
3255         struct bpf_insn *insn = env->prog->insnsi;
3256         bool tail_call_reachable = false;
3257         int ret_insn[MAX_CALL_FRAMES];
3258         int ret_prog[MAX_CALL_FRAMES];
3259         int j;
3260
3261 process_func:
3262         /* protect against potential stack overflow that might happen when
3263          * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3264          * depth for such case down to 256 so that the worst case scenario
3265          * would result in 8k stack size (32 which is tailcall limit * 256 =
3266          * 8k).
3267          *
3268          * To get the idea what might happen, see an example:
3269          * func1 -> sub rsp, 128
3270          *  subfunc1 -> sub rsp, 256
3271          *  tailcall1 -> add rsp, 256
3272          *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3273          *   subfunc2 -> sub rsp, 64
3274          *   subfunc22 -> sub rsp, 128
3275          *   tailcall2 -> add rsp, 128
3276          *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3277          *
3278          * tailcall will unwind the current stack frame but it will not get rid
3279          * of caller's stack as shown on the example above.
3280          */
3281         if (idx && subprog[idx].has_tail_call && depth >= 256) {
3282                 verbose(env,
3283                         "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3284                         depth);
3285                 return -EACCES;
3286         }
3287         /* round up to 32-bytes, since this is granularity
3288          * of interpreter stack size
3289          */
3290         depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3291         if (depth > MAX_BPF_STACK) {
3292                 verbose(env, "combined stack size of %d calls is %d. Too large\n",
3293                         frame + 1, depth);
3294                 return -EACCES;
3295         }
3296 continue_func:
3297         subprog_end = subprog[idx + 1].start;
3298         for (; i < subprog_end; i++) {
3299                 if (insn[i].code != (BPF_JMP | BPF_CALL))
3300                         continue;
3301                 if (insn[i].src_reg != BPF_PSEUDO_CALL)
3302                         continue;
3303                 /* remember insn and function to return to */
3304                 ret_insn[frame] = i + 1;
3305                 ret_prog[frame] = idx;
3306
3307                 /* find the callee */
3308                 i = i + insn[i].imm + 1;
3309                 idx = find_subprog(env, i);
3310                 if (idx < 0) {
3311                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3312                                   i);
3313                         return -EFAULT;
3314                 }
3315
3316                 if (subprog[idx].has_tail_call)
3317                         tail_call_reachable = true;
3318
3319                 frame++;
3320                 if (frame >= MAX_CALL_FRAMES) {
3321                         verbose(env, "the call stack of %d frames is too deep !\n",
3322                                 frame);
3323                         return -E2BIG;
3324                 }
3325                 goto process_func;
3326         }
3327         /* if tail call got detected across bpf2bpf calls then mark each of the
3328          * currently present subprog frames as tail call reachable subprogs;
3329          * this info will be utilized by JIT so that we will be preserving the
3330          * tail call counter throughout bpf2bpf calls combined with tailcalls
3331          */
3332         if (tail_call_reachable)
3333                 for (j = 0; j < frame; j++)
3334                         subprog[ret_prog[j]].tail_call_reachable = true;
3335         if (subprog[0].tail_call_reachable)
3336                 env->prog->aux->tail_call_reachable = true;
3337
3338         /* end of for() loop means the last insn of the 'subprog'
3339          * was reached. Doesn't matter whether it was JA or EXIT
3340          */
3341         if (frame == 0)
3342                 return 0;
3343         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3344         frame--;
3345         i = ret_insn[frame];
3346         idx = ret_prog[frame];
3347         goto continue_func;
3348 }
3349
3350 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
3351 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3352                                   const struct bpf_insn *insn, int idx)
3353 {
3354         int start = idx + insn->imm + 1, subprog;
3355
3356         subprog = find_subprog(env, start);
3357         if (subprog < 0) {
3358                 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3359                           start);
3360                 return -EFAULT;
3361         }
3362         return env->subprog_info[subprog].stack_depth;
3363 }
3364 #endif
3365
3366 int check_ctx_reg(struct bpf_verifier_env *env,
3367                   const struct bpf_reg_state *reg, int regno)
3368 {
3369         /* Access to ctx or passing it to a helper is only allowed in
3370          * its original, unmodified form.
3371          */
3372
3373         if (reg->off) {
3374                 verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
3375                         regno, reg->off);
3376                 return -EACCES;
3377         }
3378
3379         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3380                 char tn_buf[48];
3381
3382                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3383                 verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
3384                 return -EACCES;
3385         }
3386
3387         return 0;
3388 }
3389
3390 static int __check_buffer_access(struct bpf_verifier_env *env,
3391                                  const char *buf_info,
3392                                  const struct bpf_reg_state *reg,
3393                                  int regno, int off, int size)
3394 {
3395         if (off < 0) {
3396                 verbose(env,
3397                         "R%d invalid %s buffer access: off=%d, size=%d\n",
3398                         regno, buf_info, off, size);
3399                 return -EACCES;
3400         }
3401         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3402                 char tn_buf[48];
3403
3404                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3405                 verbose(env,
3406                         "R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3407                         regno, off, tn_buf);
3408                 return -EACCES;
3409         }
3410
3411         return 0;
3412 }
3413
3414 static int check_tp_buffer_access(struct bpf_verifier_env *env,
3415                                   const struct bpf_reg_state *reg,
3416                                   int regno, int off, int size)
3417 {
3418         int err;
3419
3420         err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3421         if (err)
3422                 return err;
3423
3424         if (off + size > env->prog->aux->max_tp_access)
3425                 env->prog->aux->max_tp_access = off + size;
3426
3427         return 0;
3428 }
3429
3430 static int check_buffer_access(struct bpf_verifier_env *env,
3431                                const struct bpf_reg_state *reg,
3432                                int regno, int off, int size,
3433                                bool zero_size_allowed,
3434                                const char *buf_info,
3435                                u32 *max_access)
3436 {
3437         int err;
3438
3439         err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3440         if (err)
3441                 return err;
3442
3443         if (off + size > *max_access)
3444                 *max_access = off + size;
3445
3446         return 0;
3447 }
3448
3449 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
3450 static void zext_32_to_64(struct bpf_reg_state *reg)
3451 {
3452         reg->var_off = tnum_subreg(reg->var_off);
3453         __reg_assign_32_into_64(reg);
3454 }
3455
3456 /* truncate register to smaller size (in bytes)
3457  * must be called with size < BPF_REG_SIZE
3458  */
3459 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3460 {
3461         u64 mask;
3462
3463         /* clear high bits in bit representation */
3464         reg->var_off = tnum_cast(reg->var_off, size);
3465
3466         /* fix arithmetic bounds */
3467         mask = ((u64)1 << (size * 8)) - 1;
3468         if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3469                 reg->umin_value &= mask;
3470                 reg->umax_value &= mask;
3471         } else {
3472                 reg->umin_value = 0;
3473                 reg->umax_value = mask;
3474         }
3475         reg->smin_value = reg->umin_value;
3476         reg->smax_value = reg->umax_value;
3477
3478         /* If size is smaller than 32bit register the 32bit register
3479          * values are also truncated so we push 64-bit bounds into
3480          * 32-bit bounds. Above were truncated < 32-bits already.
3481          */
3482         if (size >= 4)
3483                 return;
3484         __reg_combine_64_into_32(reg);
3485 }
3486
3487 static bool bpf_map_is_rdonly(const struct bpf_map *map)
3488 {
3489         return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
3490 }
3491
3492 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3493 {
3494         void *ptr;
3495         u64 addr;
3496         int err;
3497
3498         err = map->ops->map_direct_value_addr(map, &addr, off);
3499         if (err)
3500                 return err;
3501         ptr = (void *)(long)addr + off;
3502
3503         switch (size) {
3504         case sizeof(u8):
3505                 *val = (u64)*(u8 *)ptr;
3506                 break;
3507         case sizeof(u16):
3508                 *val = (u64)*(u16 *)ptr;
3509                 break;
3510         case sizeof(u32):
3511                 *val = (u64)*(u32 *)ptr;
3512                 break;
3513         case sizeof(u64):
3514                 *val = *(u64 *)ptr;
3515                 break;
3516         default:
3517                 return -EINVAL;
3518         }
3519         return 0;
3520 }
3521
3522 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
3523                                    struct bpf_reg_state *regs,
3524                                    int regno, int off, int size,
3525                                    enum bpf_access_type atype,
3526                                    int value_regno)
3527 {
3528         struct bpf_reg_state *reg = regs + regno;
3529         const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3530         const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3531         u32 btf_id;
3532         int ret;
3533
3534         if (off < 0) {
3535                 verbose(env,
3536                         "R%d is ptr_%s invalid negative access: off=%d\n",
3537                         regno, tname, off);
3538                 return -EACCES;
3539         }
3540         if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3541                 char tn_buf[48];
3542
3543                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3544                 verbose(env,
3545                         "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
3546                         regno, tname, off, tn_buf);
3547                 return -EACCES;
3548         }
3549
3550         if (env->ops->btf_struct_access) {
3551                 ret = env->ops->btf_struct_access(&env->log, t, off, size,
3552                                                   atype, &btf_id);
3553         } else {
3554                 if (atype != BPF_READ) {
3555                         verbose(env, "only read is supported\n");
3556                         return -EACCES;
3557                 }
3558
3559                 ret = btf_struct_access(&env->log, t, off, size, atype,
3560                                         &btf_id);
3561         }
3562
3563         if (ret < 0)
3564                 return ret;
3565
3566         if (atype == BPF_READ && value_regno >= 0)
3567                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3568
3569         return 0;
3570 }
3571
3572 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
3573                                    struct bpf_reg_state *regs,
3574                                    int regno, int off, int size,
3575                                    enum bpf_access_type atype,
3576                                    int value_regno)
3577 {
3578         struct bpf_reg_state *reg = regs + regno;
3579         struct bpf_map *map = reg->map_ptr;
3580         const struct btf_type *t;
3581         const char *tname;
3582         u32 btf_id;
3583         int ret;
3584
3585         if (!btf_vmlinux) {
3586                 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3587                 return -ENOTSUPP;
3588         }
3589
3590         if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3591                 verbose(env, "map_ptr access not supported for map type %d\n",
3592                         map->map_type);
3593                 return -ENOTSUPP;
3594         }
3595
3596         t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3597         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3598
3599         if (!env->allow_ptr_to_map_access) {
3600                 verbose(env,
3601                         "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
3602                         tname);
3603                 return -EPERM;
3604         }
3605
3606         if (off < 0) {
3607                 verbose(env, "R%d is %s invalid negative access: off=%d\n",
3608                         regno, tname, off);
3609                 return -EACCES;
3610         }
3611
3612         if (atype != BPF_READ) {
3613                 verbose(env, "only read from %s is supported\n", tname);
3614                 return -EACCES;
3615         }
3616
3617         ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3618         if (ret < 0)
3619                 return ret;
3620
3621         if (value_regno >= 0)
3622                 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3623
3624         return 0;
3625 }
3626
3627 /* Check that the stack access at the given offset is within bounds. The
3628  * maximum valid offset is -1.
3629  *
3630  * The minimum valid offset is -MAX_BPF_STACK for writes, and
3631  * -state->allocated_stack for reads.
3632  */
3633 static int check_stack_slot_within_bounds(int off,
3634                                           struct bpf_func_state *state,
3635                                           enum bpf_access_type t)
3636 {
3637         int min_valid_off;
3638
3639         if (t == BPF_WRITE)
3640                 min_valid_off = -MAX_BPF_STACK;
3641         else
3642                 min_valid_off = -state->allocated_stack;
3643
3644         if (off < min_valid_off || off > -1)
3645                 return -EACCES;
3646         return 0;
3647 }
3648
3649 /* Check that the stack access at 'regno + off' falls within the maximum stack
3650  * bounds.
3651  *
3652  * 'off' includes `regno->offset`, but not its dynamic part (if any).
3653  */
3654 static int check_stack_access_within_bounds(
3655                 struct bpf_verifier_env *env,
3656                 int regno, int off, int access_size,
3657                 enum stack_access_src src, enum bpf_access_type type)
3658 {
3659         struct bpf_reg_state *regs = cur_regs(env);
3660         struct bpf_reg_state *reg = regs + regno;
3661         struct bpf_func_state *state = func(env, reg);
3662         int min_off, max_off;
3663         int err;
3664         char *err_extra;
3665
3666         if (src == ACCESS_HELPER)
3667                 /* We don't know if helpers are reading or writing (or both). */
3668                 err_extra = " indirect access to";
3669         else if (type == BPF_READ)
3670                 err_extra = " read from";
3671         else
3672                 err_extra = " write to";
3673
3674         if (tnum_is_const(reg->var_off)) {
3675                 min_off = reg->var_off.value + off;
3676                 if (access_size > 0)
3677                         max_off = min_off + access_size - 1;
3678                 else
3679                         max_off = min_off;
3680         } else {
3681                 if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3682                     reg->smin_value <= -BPF_MAX_VAR_OFF) {
3683                         verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
3684                                 err_extra, regno);
3685                         return -EACCES;
3686                 }
3687                 min_off = reg->smin_value + off;
3688                 if (access_size > 0)
3689                         max_off = reg->smax_value + off + access_size - 1;
3690                 else
3691                         max_off = min_off;
3692         }
3693
3694         err = check_stack_slot_within_bounds(min_off, state, type);
3695         if (!err)
3696                 err = check_stack_slot_within_bounds(max_off, state, type);
3697
3698         if (err) {
3699                 if (tnum_is_const(reg->var_off)) {
3700                         verbose(env, "invalid%s stack R%d off=%d size=%d\n",
3701                                 err_extra, regno, off, access_size);
3702                 } else {
3703                         char tn_buf[48];
3704
3705                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3706                         verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
3707                                 err_extra, regno, tn_buf, access_size);
3708                 }
3709         }
3710         return err;
3711 }
3712
3713 /* check whether memory at (regno + off) is accessible for t = (read | write)
3714  * if t==write, value_regno is a register which value is stored into memory
3715  * if t==read, value_regno is a register which will receive the value from memory
3716  * if t==write && value_regno==-1, some unknown value is stored into memory
3717  * if t==read && value_regno==-1, don't care what we read from memory
3718  */
3719 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
3720                             int off, int bpf_size, enum bpf_access_type t,
3721                             int value_regno, bool strict_alignment_once)
3722 {
3723         struct bpf_reg_state *regs = cur_regs(env);
3724         struct bpf_reg_state *reg = regs + regno;
3725         struct bpf_func_state *state;
3726         int size, err = 0;
3727
3728         size = bpf_size_to_bytes(bpf_size);
3729         if (size < 0)
3730                 return size;
3731
3732         /* alignment checks will add in reg->off themselves */
3733         err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
3734         if (err)
3735                 return err;
3736
3737         /* for access checks, reg->off is just part of off */
3738         off += reg->off;
3739
3740         if (reg->type == PTR_TO_MAP_VALUE) {
3741                 if (t == BPF_WRITE && value_regno >= 0 &&
3742                     is_pointer_value(env, value_regno)) {
3743                         verbose(env, "R%d leaks addr into map\n", value_regno);
3744                         return -EACCES;
3745                 }
3746                 err = check_map_access_type(env, regno, off, size, t);
3747                 if (err)
3748                         return err;
3749                 err = check_map_access(env, regno, off, size, false);
3750                 if (!err && t == BPF_READ && value_regno >= 0) {
3751                         struct bpf_map *map = reg->map_ptr;
3752
3753                         /* if map is read-only, track its contents as scalars */
3754                         if (tnum_is_const(reg->var_off) &&
3755                             bpf_map_is_rdonly(map) &&
3756                             map->ops->map_direct_value_addr) {
3757                                 int map_off = off + reg->var_off.value;
3758                                 u64 val = 0;
3759
3760                                 err = bpf_map_direct_read(map, map_off, size,
3761                                                           &val);
3762                                 if (err)
3763                                         return err;
3764
3765                                 regs[value_regno].type = SCALAR_VALUE;
3766                                 __mark_reg_known(&regs[value_regno], val);
3767                         } else {
3768                                 mark_reg_unknown(env, regs, value_regno);
3769                         }
3770                 }
3771         } else if (reg->type == PTR_TO_MEM) {
3772                 if (t == BPF_WRITE && value_regno >= 0 &&
3773                     is_pointer_value(env, value_regno)) {
3774                         verbose(env, "R%d leaks addr into mem\n", value_regno);
3775                         return -EACCES;
3776                 }
3777                 err = check_mem_region_access(env, regno, off, size,
3778                                               reg->mem_size, false);
3779                 if (!err && t == BPF_READ && value_regno >= 0)
3780                         mark_reg_unknown(env, regs, value_regno);
3781         } else if (reg->type == PTR_TO_CTX) {
3782                 enum bpf_reg_type reg_type = SCALAR_VALUE;
3783                 u32 btf_id = 0;
3784
3785                 if (t == BPF_WRITE && value_regno >= 0 &&
3786                     is_pointer_value(env, value_regno)) {
3787                         verbose(env, "R%d leaks addr into ctx\n", value_regno);
3788                         return -EACCES;
3789                 }
3790
3791                 err = check_ctx_reg(env, reg, regno);
3792                 if (err < 0)
3793                         return err;
3794
3795                 err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3796                 if (err)
3797                         verbose_linfo(env, insn_idx, "; ");
3798                 if (!err && t == BPF_READ && value_regno >= 0) {
3799                         /* ctx access returns either a scalar, or a
3800                          * PTR_TO_PACKET[_META,_END]. In the latter
3801                          * case, we know the offset is zero.
3802                          */
3803                         if (reg_type == SCALAR_VALUE) {
3804                                 mark_reg_unknown(env, regs, value_regno);
3805                         } else {
3806                                 mark_reg_known_zero(env, regs,
3807                                                     value_regno);
3808                                 if (reg_type_may_be_null(reg_type))
3809                                         regs[value_regno].id = ++env->id_gen;
3810                                 /* A load of ctx field could have different
3811                                  * actual load size with the one encoded in the
3812                                  * insn. When the dst is PTR, it is for sure not
3813                                  * a sub-register.
3814                                  */
3815                                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3816                                 if (reg_type == PTR_TO_BTF_ID ||
3817                                     reg_type == PTR_TO_BTF_ID_OR_NULL)
3818                                         regs[value_regno].btf_id = btf_id;
3819                         }
3820                         regs[value_regno].type = reg_type;
3821                 }
3822
3823         } else if (reg->type == PTR_TO_STACK) {
3824                 /* Basic bounds checks. */
3825                 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
3826                 if (err)
3827                         return err;
3828
3829                 state = func(env, reg);
3830                 err = update_stack_depth(env, state, off);
3831                 if (err)
3832                         return err;
3833
3834                 if (t == BPF_READ)
3835                         err = check_stack_read(env, regno, off, size,
3836                                                value_regno);
3837                 else
3838                         err = check_stack_write(env, regno, off, size,
3839                                                 value_regno, insn_idx);
3840         } else if (reg_is_pkt_pointer(reg)) {
3841                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3842                         verbose(env, "cannot write into packet\n");
3843                         return -EACCES;
3844                 }
3845                 if (t == BPF_WRITE && value_regno >= 0 &&
3846                     is_pointer_value(env, value_regno)) {
3847                         verbose(env, "R%d leaks addr into packet\n",
3848                                 value_regno);
3849                         return -EACCES;
3850                 }
3851                 err = check_packet_access(env, regno, off, size, false);
3852                 if (!err && t == BPF_READ && value_regno >= 0)
3853                         mark_reg_unknown(env, regs, value_regno);
3854         } else if (reg->type == PTR_TO_FLOW_KEYS) {
3855                 if (t == BPF_WRITE && value_regno >= 0 &&
3856                     is_pointer_value(env, value_regno)) {
3857                         verbose(env, "R%d leaks addr into flow keys\n",
3858                                 value_regno);
3859                         return -EACCES;
3860                 }
3861
3862                 err = check_flow_keys_access(env, off, size);
3863                 if (!err && t == BPF_READ && value_regno >= 0)
3864                         mark_reg_unknown(env, regs, value_regno);
3865         } else if (type_is_sk_pointer(reg->type)) {
3866                 if (t == BPF_WRITE) {
3867                         verbose(env, "R%d cannot write into %s\n",
3868                                 regno, reg_type_str[reg->type]);
3869                         return -EACCES;
3870                 }
3871                 err = check_sock_access(env, insn_idx, regno, off, size, t);
3872                 if (!err && value_regno >= 0)
3873                         mark_reg_unknown(env, regs, value_regno);
3874         } else if (reg->type == PTR_TO_TP_BUFFER) {
3875                 err = check_tp_buffer_access(env, reg, regno, off, size);
3876                 if (!err && t == BPF_READ && value_regno >= 0)
3877                         mark_reg_unknown(env, regs, value_regno);
3878         } else if (reg->type == PTR_TO_BTF_ID) {
3879                 err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
3880                                               value_regno);
3881         } else if (reg->type == CONST_PTR_TO_MAP) {
3882                 err = check_ptr_to_map_access(env, regs, regno, off, size, t,
3883                                               value_regno);
3884         } else if (reg->type == PTR_TO_RDONLY_BUF) {
3885                 if (t == BPF_WRITE) {
3886                         verbose(env, "R%d cannot write into %s\n",
3887                                 regno, reg_type_str[reg->type]);
3888                         return -EACCES;
3889                 }
3890                 err = check_buffer_access(env, reg, regno, off, size, false,
3891                                           "rdonly",
3892                                           &env->prog->aux->max_rdonly_access);
3893                 if (!err && value_regno >= 0)
3894                         mark_reg_unknown(env, regs, value_regno);
3895         } else if (reg->type == PTR_TO_RDWR_BUF) {
3896                 err = check_buffer_access(env, reg, regno, off, size, false,
3897                                           "rdwr",
3898                                           &env->prog->aux->max_rdwr_access);
3899                 if (!err && t == BPF_READ && value_regno >= 0)
3900                         mark_reg_unknown(env, regs, value_regno);
3901         } else {
3902                 verbose(env, "R%d invalid mem access '%s'\n", regno,
3903                         reg_type_str[reg->type]);
3904                 return -EACCES;
3905         }
3906
3907         if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
3908             regs[value_regno].type == SCALAR_VALUE) {
3909                 /* b/h/w load zero-extends, mark upper bits as known 0 */
3910                 coerce_reg_to_size(&regs[value_regno], size);
3911         }
3912         return err;
3913 }
3914
3915 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3916 {
3917         int err;
3918
3919         if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
3920             insn->imm != 0) {
3921                 verbose(env, "BPF_XADD uses reserved fields\n");
3922                 return -EINVAL;
3923         }
3924
3925         /* check src1 operand */
3926         err = check_reg_arg(env, insn->src_reg, SRC_OP);
3927         if (err)
3928                 return err;
3929
3930         /* check src2 operand */
3931         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3932         if (err)
3933                 return err;
3934
3935         if (is_pointer_value(env, insn->src_reg)) {
3936                 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3937                 return -EACCES;
3938         }
3939
3940         if (is_ctx_reg(env, insn->dst_reg) ||
3941             is_pkt_reg(env, insn->dst_reg) ||
3942             is_flow_key_reg(env, insn->dst_reg) ||
3943             is_sk_reg(env, insn->dst_reg)) {
3944                 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
3945                         insn->dst_reg,
3946                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
3947                 return -EACCES;
3948         }
3949
3950         /* check whether atomic_add can read the memory */
3951         err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3952                                BPF_SIZE(insn->code), BPF_READ, -1, true);
3953         if (err)
3954                 return err;
3955
3956         /* check whether atomic_add can write into the same memory */
3957         return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3958                                 BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3959 }
3960
3961 /* When register 'regno' is used to read the stack (either directly or through
3962  * a helper function) make sure that it's within stack boundary and, depending
3963  * on the access type, that all elements of the stack are initialized.
3964  *
3965  * 'off' includes 'regno->off', but not its dynamic part (if any).
3966  *
3967  * All registers that have been spilled on the stack in the slots within the
3968  * read offsets are marked as read.
3969  */
3970 static int check_stack_range_initialized(
3971                 struct bpf_verifier_env *env, int regno, int off,
3972                 int access_size, bool zero_size_allowed,
3973                 enum stack_access_src type, struct bpf_call_arg_meta *meta)
3974 {
3975         struct bpf_reg_state *reg = reg_state(env, regno);
3976         struct bpf_func_state *state = func(env, reg);
3977         int err, min_off, max_off, i, j, slot, spi;
3978         char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
3979         enum bpf_access_type bounds_check_type;
3980         /* Some accesses can write anything into the stack, others are
3981          * read-only.
3982          */
3983         bool clobber = false;
3984
3985         if (access_size == 0 && !zero_size_allowed) {
3986                 verbose(env, "invalid zero-sized read\n");
3987                 return -EACCES;
3988         }
3989
3990         if (type == ACCESS_HELPER) {
3991                 /* The bounds checks for writes are more permissive than for
3992                  * reads. However, if raw_mode is not set, we'll do extra
3993                  * checks below.
3994                  */
3995                 bounds_check_type = BPF_WRITE;
3996                 clobber = true;
3997         } else {
3998                 bounds_check_type = BPF_READ;
3999         }
4000         err = check_stack_access_within_bounds(env, regno, off, access_size,
4001                                                type, bounds_check_type);
4002         if (err)
4003                 return err;
4004
4005
4006         if (tnum_is_const(reg->var_off)) {
4007                 min_off = max_off = reg->var_off.value + off;
4008         } else {
4009                 /* Variable offset is prohibited for unprivileged mode for
4010                  * simplicity since it requires corresponding support in
4011                  * Spectre masking for stack ALU.
4012                  * See also retrieve_ptr_limit().
4013                  */
4014                 if (!env->bypass_spec_v1) {
4015                         char tn_buf[48];
4016
4017                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4018                         verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4019                                 regno, err_extra, tn_buf);
4020                         return -EACCES;
4021                 }
4022                 /* Only initialized buffer on stack is allowed to be accessed
4023                  * with variable offset. With uninitialized buffer it's hard to
4024                  * guarantee that whole memory is marked as initialized on
4025                  * helper return since specific bounds are unknown what may
4026                  * cause uninitialized stack leaking.
4027                  */
4028                 if (meta && meta->raw_mode)
4029                         meta = NULL;
4030
4031                 min_off = reg->smin_value + off;
4032                 max_off = reg->smax_value + off;
4033         }
4034
4035         if (meta && meta->raw_mode) {
4036                 meta->access_size = access_size;
4037                 meta->regno = regno;
4038                 return 0;
4039         }
4040
4041         for (i = min_off; i < max_off + access_size; i++) {
4042                 u8 *stype;
4043
4044                 slot = -i - 1;
4045                 spi = slot / BPF_REG_SIZE;
4046                 if (state->allocated_stack <= slot)
4047                         goto err;
4048                 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4049                 if (*stype == STACK_MISC)
4050                         goto mark;
4051                 if (*stype == STACK_ZERO) {
4052                         if (clobber) {
4053                                 /* helper can write anything into the stack */
4054                                 *stype = STACK_MISC;
4055                         }
4056                         goto mark;
4057                 }
4058
4059                 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4060                     state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4061                         goto mark;
4062
4063                 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4064                     (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4065                      env->allow_ptr_leaks)) {
4066                         if (clobber) {
4067                                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4068                                 for (j = 0; j < BPF_REG_SIZE; j++)
4069                                         state->stack[spi].slot_type[j] = STACK_MISC;
4070                         }
4071                         goto mark;
4072                 }
4073
4074 err:
4075                 if (tnum_is_const(reg->var_off)) {
4076                         verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4077                                 err_extra, regno, min_off, i - min_off, access_size);
4078                 } else {
4079                         char tn_buf[48];
4080
4081                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4082                         verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4083                                 err_extra, regno, tn_buf, i - min_off, access_size);
4084                 }
4085                 return -EACCES;
4086 mark:
4087                 /* reading any byte out of 8-byte 'spill_slot' will cause
4088                  * the whole slot to be marked as 'read'
4089                  */
4090                 mark_reg_read(env, &state->stack[spi].spilled_ptr,
4091                               state->stack[spi].spilled_ptr.parent,
4092                               REG_LIVE_READ64);
4093         }
4094         return update_stack_depth(env, state, min_off);
4095 }
4096
4097 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4098                                    int access_size, bool zero_size_allowed,
4099                                    struct bpf_call_arg_meta *meta)
4100 {
4101         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4102
4103         switch (reg->type) {
4104         case PTR_TO_PACKET:
4105         case PTR_TO_PACKET_META:
4106                 return check_packet_access(env, regno, reg->off, access_size,
4107                                            zero_size_allowed);
4108         case PTR_TO_MAP_VALUE:
4109                 if (check_map_access_type(env, regno, reg->off, access_size,
4110                                           meta && meta->raw_mode ? BPF_WRITE :
4111                                           BPF_READ))
4112                         return -EACCES;
4113                 return check_map_access(env, regno, reg->off, access_size,
4114                                         zero_size_allowed);
4115         case PTR_TO_MEM:
4116                 return check_mem_region_access(env, regno, reg->off,
4117                                                access_size, reg->mem_size,
4118                                                zero_size_allowed);
4119         case PTR_TO_RDONLY_BUF:
4120                 if (meta && meta->raw_mode)
4121                         return -EACCES;
4122                 return check_buffer_access(env, reg, regno, reg->off,
4123                                            access_size, zero_size_allowed,
4124                                            "rdonly",
4125                                            &env->prog->aux->max_rdonly_access);
4126         case PTR_TO_RDWR_BUF:
4127                 return check_buffer_access(env, reg, regno, reg->off,
4128                                            access_size, zero_size_allowed,
4129                                            "rdwr",
4130                                            &env->prog->aux->max_rdwr_access);
4131         case PTR_TO_STACK:
4132                 return check_stack_range_initialized(
4133                                 env,
4134                                 regno, reg->off, access_size,
4135                                 zero_size_allowed, ACCESS_HELPER, meta);
4136         default: /* scalar_value or invalid ptr */
4137                 /* Allow zero-byte read from NULL, regardless of pointer type */
4138                 if (zero_size_allowed && access_size == 0 &&
4139                     register_is_null(reg))
4140                         return 0;
4141
4142                 verbose(env, "R%d type=%s expected=%s\n", regno,
4143                         reg_type_str[reg->type],
4144                         reg_type_str[PTR_TO_STACK]);
4145                 return -EACCES;
4146         }
4147 }
4148
4149 /* Implementation details:
4150  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4151  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4152  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4153  * value_or_null->value transition, since the verifier only cares about
4154  * the range of access to valid map value pointer and doesn't care about actual
4155  * address of the map element.
4156  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4157  * reg->id > 0 after value_or_null->value transition. By doing so
4158  * two bpf_map_lookups will be considered two different pointers that
4159  * point to different bpf_spin_locks.
4160  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4161  * dead-locks.
4162  * Since only one bpf_spin_lock is allowed the checks are simpler than
4163  * reg_is_refcounted() logic. The verifier needs to remember only
4164  * one spin_lock instead of array of acquired_refs.
4165  * cur_state->active_spin_lock remembers which map value element got locked
4166  * and clears it after bpf_spin_unlock.
4167  */
4168 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4169                              bool is_lock)
4170 {
4171         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4172         struct bpf_verifier_state *cur = env->cur_state;
4173         bool is_const = tnum_is_const(reg->var_off);
4174         struct bpf_map *map = reg->map_ptr;
4175         u64 val = reg->var_off.value;
4176
4177         if (!is_const) {
4178                 verbose(env,
4179                         "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4180                         regno);
4181                 return -EINVAL;
4182         }
4183         if (!map->btf) {
4184                 verbose(env,
4185                         "map '%s' has to have BTF in order to use bpf_spin_lock\n",
4186                         map->name);
4187                 return -EINVAL;
4188         }
4189         if (!map_value_has_spin_lock(map)) {
4190                 if (map->spin_lock_off == -E2BIG)
4191                         verbose(env,
4192                                 "map '%s' has more than one 'struct bpf_spin_lock'\n",
4193                                 map->name);
4194                 else if (map->spin_lock_off == -ENOENT)
4195                         verbose(env,
4196                                 "map '%s' doesn't have 'struct bpf_spin_lock'\n",
4197                                 map->name);
4198                 else
4199                         verbose(env,
4200                                 "map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4201                                 map->name);
4202                 return -EINVAL;
4203         }
4204         if (map->spin_lock_off != val + reg->off) {
4205                 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4206                         val + reg->off);
4207                 return -EINVAL;
4208         }
4209         if (is_lock) {
4210                 if (cur->active_spin_lock) {
4211                         verbose(env,
4212                                 "Locking two bpf_spin_locks are not allowed\n");
4213                         return -EINVAL;
4214                 }
4215                 cur->active_spin_lock = reg->id;
4216         } else {
4217                 if (!cur->active_spin_lock) {
4218                         verbose(env, "bpf_spin_unlock without taking a lock\n");
4219                         return -EINVAL;
4220                 }
4221                 if (cur->active_spin_lock != reg->id) {
4222                         verbose(env, "bpf_spin_unlock of different lock\n");
4223                         return -EINVAL;
4224                 }
4225                 cur->active_spin_lock = 0;
4226         }
4227         return 0;
4228 }
4229
4230 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4231 {
4232         return type == ARG_PTR_TO_MEM ||
4233                type == ARG_PTR_TO_MEM_OR_NULL ||
4234                type == ARG_PTR_TO_UNINIT_MEM;
4235 }
4236
4237 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4238 {
4239         return type == ARG_CONST_SIZE ||
4240                type == ARG_CONST_SIZE_OR_ZERO;
4241 }
4242
4243 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4244 {
4245         return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4246 }
4247
4248 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4249 {
4250         return type == ARG_PTR_TO_INT ||
4251                type == ARG_PTR_TO_LONG;
4252 }
4253
4254 static int int_ptr_type_to_size(enum bpf_arg_type type)
4255 {
4256         if (type == ARG_PTR_TO_INT)
4257                 return sizeof(u32);
4258         else if (type == ARG_PTR_TO_LONG)
4259                 return sizeof(u64);
4260
4261         return -EINVAL;
4262 }
4263
4264 static int resolve_map_arg_type(struct bpf_verifier_env *env,
4265                                  const struct bpf_call_arg_meta *meta,
4266                                  enum bpf_arg_type *arg_type)
4267 {
4268         if (!meta->map_ptr) {
4269                 /* kernel subsystem misconfigured verifier */
4270                 verbose(env, "invalid map_ptr to access map->type\n");
4271                 return -EACCES;
4272         }
4273
4274         switch (meta->map_ptr->map_type) {
4275         case BPF_MAP_TYPE_SOCKMAP:
4276         case BPF_MAP_TYPE_SOCKHASH:
4277                 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4278                         *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4279                 } else {
4280                         verbose(env, "invalid arg_type for sockmap/sockhash\n");
4281                         return -EINVAL;
4282                 }
4283                 break;
4284
4285         default:
4286                 break;
4287         }
4288         return 0;
4289 }
4290
4291 struct bpf_reg_types {
4292         const enum bpf_reg_type types[10];
4293         u32 *btf_id;
4294 };
4295
4296 static const struct bpf_reg_types map_key_value_types = {
4297         .types = {
4298                 PTR_TO_STACK,
4299                 PTR_TO_PACKET,
4300                 PTR_TO_PACKET_META,
4301                 PTR_TO_MAP_VALUE,
4302         },
4303 };
4304
4305 static const struct bpf_reg_types sock_types = {
4306         .types = {
4307                 PTR_TO_SOCK_COMMON,
4308                 PTR_TO_SOCKET,
4309                 PTR_TO_TCP_SOCK,
4310                 PTR_TO_XDP_SOCK,
4311         },
4312 };
4313
4314 #ifdef CONFIG_NET
4315 static const struct bpf_reg_types btf_id_sock_common_types = {
4316         .types = {
4317                 PTR_TO_SOCK_COMMON,
4318                 PTR_TO_SOCKET,
4319                 PTR_TO_TCP_SOCK,
4320                 PTR_TO_XDP_SOCK,
4321                 PTR_TO_BTF_ID,
4322         },
4323         .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4324 };
4325 #endif
4326
4327 static const struct bpf_reg_types mem_types = {
4328         .types = {
4329                 PTR_TO_STACK,
4330                 PTR_TO_PACKET,
4331                 PTR_TO_PACKET_META,
4332                 PTR_TO_MAP_VALUE,
4333                 PTR_TO_MEM,
4334                 PTR_TO_RDONLY_BUF,
4335                 PTR_TO_RDWR_BUF,
4336         },
4337 };
4338
4339 static const struct bpf_reg_types int_ptr_types = {
4340         .types = {
4341                 PTR_TO_STACK,
4342                 PTR_TO_PACKET,
4343                 PTR_TO_PACKET_META,
4344                 PTR_TO_MAP_VALUE,
4345         },
4346 };
4347
4348 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
4349 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
4350 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
4351 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
4352 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
4353 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
4354 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
4355 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
4356
4357 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4358         [ARG_PTR_TO_MAP_KEY]            = &map_key_value_types,
4359         [ARG_PTR_TO_MAP_VALUE]          = &map_key_value_types,
4360         [ARG_PTR_TO_UNINIT_MAP_VALUE]   = &map_key_value_types,
4361         [ARG_PTR_TO_MAP_VALUE_OR_NULL]  = &map_key_value_types,
4362         [ARG_CONST_SIZE]                = &scalar_types,
4363         [ARG_CONST_SIZE_OR_ZERO]        = &scalar_types,
4364         [ARG_CONST_ALLOC_SIZE_OR_ZERO]  = &scalar_types,
4365         [ARG_CONST_MAP_PTR]             = &const_map_ptr_types,
4366         [ARG_PTR_TO_CTX]                = &context_types,
4367         [ARG_PTR_TO_CTX_OR_NULL]        = &context_types,
4368         [ARG_PTR_TO_SOCK_COMMON]        = &sock_types,
4369 #ifdef CONFIG_NET
4370         [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
4371 #endif
4372         [ARG_PTR_TO_SOCKET]             = &fullsock_types,
4373         [ARG_PTR_TO_SOCKET_OR_NULL]     = &fullsock_types,
4374         [ARG_PTR_TO_BTF_ID]             = &btf_ptr_types,
4375         [ARG_PTR_TO_SPIN_LOCK]          = &spin_lock_types,
4376         [ARG_PTR_TO_MEM]                = &mem_types,
4377         [ARG_PTR_TO_MEM_OR_NULL]        = &mem_types,
4378         [ARG_PTR_TO_UNINIT_MEM]         = &mem_types,
4379         [ARG_PTR_TO_ALLOC_MEM]          = &alloc_mem_types,
4380         [ARG_PTR_TO_ALLOC_MEM_OR_NULL]  = &alloc_mem_types,
4381         [ARG_PTR_TO_INT]                = &int_ptr_types,
4382         [ARG_PTR_TO_LONG]               = &int_ptr_types,
4383         [ARG_PTR_TO_PERCPU_BTF_ID]      = &percpu_btf_ptr_types,
4384 };
4385
4386 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
4387                           enum bpf_arg_type arg_type,
4388                           const u32 *arg_btf_id)
4389 {
4390         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4391         enum bpf_reg_type expected, type = reg->type;
4392         const struct bpf_reg_types *compatible;
4393         int i, j;
4394
4395         compatible = compatible_reg_types[arg_type];
4396         if (!compatible) {
4397                 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4398                 return -EFAULT;
4399         }
4400
4401         for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4402                 expected = compatible->types[i];
4403                 if (expected == NOT_INIT)
4404                         break;
4405
4406                 if (type == expected)
4407                         goto found;
4408         }
4409
4410         verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
4411         for (j = 0; j + 1 < i; j++)
4412                 verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
4413         verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
4414         return -EACCES;
4415
4416 found:
4417         if (type == PTR_TO_BTF_ID) {
4418                 if (!arg_btf_id) {
4419                         if (!compatible->btf_id) {
4420                                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4421                                 return -EFAULT;
4422                         }
4423                         arg_btf_id = compatible->btf_id;
4424                 }
4425
4426                 if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
4427                                           *arg_btf_id)) {
4428                         verbose(env, "R%d is of type %s but %s is expected\n",
4429                                 regno, kernel_type_name(reg->btf_id),
4430                                 kernel_type_name(*arg_btf_id));
4431                         return -EACCES;
4432                 }
4433
4434                 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4435                         verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
4436                                 regno);
4437                         return -EACCES;
4438                 }
4439         }
4440
4441         return 0;
4442 }
4443
4444 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
4445                           struct bpf_call_arg_meta *meta,
4446                           const struct bpf_func_proto *fn)
4447 {
4448         u32 regno = BPF_REG_1 + arg;
4449         struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4450         enum bpf_arg_type arg_type = fn->arg_type[arg];
4451         enum bpf_reg_type type = reg->type;
4452         int err = 0;
4453
4454         if (arg_type == ARG_DONTCARE)
4455                 return 0;
4456
4457         err = check_reg_arg(env, regno, SRC_OP);
4458         if (err)
4459                 return err;
4460
4461         if (arg_type == ARG_ANYTHING) {
4462                 if (is_pointer_value(env, regno)) {
4463                         verbose(env, "R%d leaks addr into helper function\n",
4464                                 regno);
4465                         return -EACCES;
4466                 }
4467                 return 0;
4468         }
4469
4470         if (type_is_pkt_pointer(type) &&
4471             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4472                 verbose(env, "helper access to the packet is not allowed\n");
4473                 return -EACCES;
4474         }
4475
4476         if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4477             arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
4478             arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
4479                 err = resolve_map_arg_type(env, meta, &arg_type);
4480                 if (err)
4481                         return err;
4482         }
4483
4484         if (register_is_null(reg) && arg_type_may_be_null(arg_type))
4485                 /* A NULL register has a SCALAR_VALUE type, so skip
4486                  * type checking.
4487                  */
4488                 goto skip_type_check;
4489
4490         err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4491         if (err)
4492                 return err;
4493
4494         if (type == PTR_TO_CTX) {
4495                 err = check_ctx_reg(env, reg, regno);
4496                 if (err < 0)
4497                         return err;
4498         }
4499
4500 skip_type_check:
4501         if (reg->ref_obj_id) {
4502                 if (meta->ref_obj_id) {
4503                         verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
4504                                 regno, reg->ref_obj_id,
4505                                 meta->ref_obj_id);
4506                         return -EFAULT;
4507                 }
4508                 meta->ref_obj_id = reg->ref_obj_id;
4509         }
4510
4511         if (arg_type == ARG_CONST_MAP_PTR) {
4512                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4513                 meta->map_ptr = reg->map_ptr;
4514         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
4515                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
4516                  * check that [key, key + map->key_size) are within
4517                  * stack limits and initialized
4518                  */
4519                 if (!meta->map_ptr) {
4520                         /* in function declaration map_ptr must come before
4521                          * map_key, so that it's verified and known before
4522                          * we have to check map_key here. Otherwise it means
4523                          * that kernel subsystem misconfigured verifier
4524                          */
4525                         verbose(env, "invalid map_ptr to access map->key\n");
4526                         return -EACCES;
4527                 }
4528                 err = check_helper_mem_access(env, regno,
4529                                               meta->map_ptr->key_size, false,
4530                                               NULL);
4531         } else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4532                    (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
4533                     !register_is_null(reg)) ||
4534                    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4535                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
4536                  * check [value, value + map->value_size) validity
4537                  */
4538                 if (!meta->map_ptr) {
4539                         /* kernel subsystem misconfigured verifier */
4540                         verbose(env, "invalid map_ptr to access map->value\n");
4541                         return -EACCES;
4542                 }
4543                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4544                 err = check_helper_mem_access(env, regno,
4545                                               meta->map_ptr->value_size, false,
4546                                               meta);
4547         } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4548                 if (!reg->btf_id) {
4549                         verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4550                         return -EACCES;
4551                 }
4552                 meta->ret_btf_id = reg->btf_id;
4553         } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4554                 if (meta->func_id == BPF_FUNC_spin_lock) {
4555                         if (process_spin_lock(env, regno, true))
4556                                 return -EACCES;
4557                 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
4558                         if (process_spin_lock(env, regno, false))
4559                                 return -EACCES;
4560                 } else {
4561                         verbose(env, "verifier internal error\n");
4562                         return -EFAULT;
4563                 }
4564         } else if (arg_type_is_mem_ptr(arg_type)) {
4565                 /* The access to this pointer is only checked when we hit the
4566                  * next is_mem_size argument below.
4567                  */
4568                 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4569         } else if (arg_type_is_mem_size(arg_type)) {
4570                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4571
4572                 /* This is used to refine r0 return value bounds for helpers
4573                  * that enforce this value as an upper bound on return values.
4574                  * See do_refine_retval_range() for helpers that can refine
4575                  * the return value. C type of helper is u32 so we pull register
4576                  * bound from umax_value however, if negative verifier errors
4577                  * out. Only upper bounds can be learned because retval is an
4578                  * int type and negative retvals are allowed.
4579                  */
4580                 meta->msize_max_value = reg->umax_value;
4581
4582                 /* The register is SCALAR_VALUE; the access check
4583                  * happens using its boundaries.
4584                  */
4585                 if (!tnum_is_const(reg->var_off))
4586                         /* For unprivileged variable accesses, disable raw
4587                          * mode so that the program is required to
4588                          * initialize all the memory that the helper could
4589                          * just partially fill up.
4590                          */
4591                         meta = NULL;
4592
4593                 if (reg->smin_value < 0) {
4594                         verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
4595                                 regno);
4596                         return -EACCES;
4597                 }
4598
4599                 if (reg->umin_value == 0) {
4600                         err = check_helper_mem_access(env, regno - 1, 0,
4601                                                       zero_size_allowed,
4602                                                       meta);
4603                         if (err)
4604                                 return err;
4605                 }
4606
4607                 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4608                         verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
4609                                 regno);
4610                         return -EACCES;
4611                 }
4612                 err = check_helper_mem_access(env, regno - 1,
4613                                               reg->umax_value,
4614                                               zero_size_allowed, meta);
4615                 if (!err)
4616                         err = mark_chain_precision(env, regno);
4617         } else if (arg_type_is_alloc_size(arg_type)) {
4618                 if (!tnum_is_const(reg->var_off)) {
4619                         verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
4620                                 regno);
4621                         return -EACCES;
4622                 }
4623                 meta->mem_size = reg->var_off.value;
4624         } else if (arg_type_is_int_ptr(arg_type)) {
4625                 int size = int_ptr_type_to_size(arg_type);
4626
4627                 err = check_helper_mem_access(env, regno, size, false, meta);
4628                 if (err)
4629                         return err;
4630                 err = check_ptr_alignment(env, reg, 0, size, true);
4631         }
4632
4633         return err;
4634 }
4635
4636 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4637 {
4638         enum bpf_attach_type eatype = env->prog->expected_attach_type;
4639         enum bpf_prog_type type = resolve_prog_type(env->prog);
4640
4641         if (func_id != BPF_FUNC_map_update_elem)
4642                 return false;
4643
4644         /* It's not possible to get access to a locked struct sock in these
4645          * contexts, so updating is safe.
4646          */
4647         switch (type) {
4648         case BPF_PROG_TYPE_TRACING:
4649                 if (eatype == BPF_TRACE_ITER)
4650                         return true;
4651                 break;
4652         case BPF_PROG_TYPE_SOCKET_FILTER:
4653         case BPF_PROG_TYPE_SCHED_CLS:
4654         case BPF_PROG_TYPE_SCHED_ACT:
4655         case BPF_PROG_TYPE_XDP:
4656         case BPF_PROG_TYPE_SK_REUSEPORT:
4657         case BPF_PROG_TYPE_FLOW_DISSECTOR:
4658         case BPF_PROG_TYPE_SK_LOOKUP:
4659                 return true;
4660         default:
4661                 break;
4662         }
4663
4664         verbose(env, "cannot update sockmap in this context\n");
4665         return false;
4666 }
4667
4668 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4669 {
4670         return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
4671 }
4672
4673 static int check_map_func_compatibility(struct bpf_verifier_env *env,
4674                                         struct bpf_map *map, int func_id)
4675 {
4676         if (!map)
4677                 return 0;
4678
4679         /* We need a two way check, first is from map perspective ... */
4680         switch (map->map_type) {
4681         case BPF_MAP_TYPE_PROG_ARRAY:
4682                 if (func_id != BPF_FUNC_tail_call)
4683                         goto error;
4684                 break;
4685         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4686                 if (func_id != BPF_FUNC_perf_event_read &&
4687                     func_id != BPF_FUNC_perf_event_output &&
4688                     func_id != BPF_FUNC_skb_output &&
4689                     func_id != BPF_FUNC_perf_event_read_value &&
4690                     func_id != BPF_FUNC_xdp_output)
4691                         goto error;
4692                 break;
4693         case BPF_MAP_TYPE_RINGBUF:
4694                 if (func_id != BPF_FUNC_ringbuf_output &&
4695                     func_id != BPF_FUNC_ringbuf_reserve &&
4696                     func_id != BPF_FUNC_ringbuf_query)
4697                         goto error;
4698                 break;
4699         case BPF_MAP_TYPE_STACK_TRACE:
4700                 if (func_id != BPF_FUNC_get_stackid)
4701                         goto error;
4702                 break;
4703         case BPF_MAP_TYPE_CGROUP_ARRAY:
4704                 if (func_id != BPF_FUNC_skb_under_cgroup &&
4705                     func_id != BPF_FUNC_current_task_under_cgroup)
4706                         goto error;
4707                 break;
4708         case BPF_MAP_TYPE_CGROUP_STORAGE:
4709         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4710                 if (func_id != BPF_FUNC_get_local_storage)
4711                         goto error;
4712                 break;
4713         case BPF_MAP_TYPE_DEVMAP:
4714         case BPF_MAP_TYPE_DEVMAP_HASH:
4715                 if (func_id != BPF_FUNC_redirect_map &&
4716                     func_id != BPF_FUNC_map_lookup_elem)
4717                         goto error;
4718                 break;
4719         /* Restrict bpf side of cpumap and xskmap, open when use-cases
4720          * appear.
4721          */
4722         case BPF_MAP_TYPE_CPUMAP:
4723                 if (func_id != BPF_FUNC_redirect_map)
4724                         goto error;
4725                 break;
4726         case BPF_MAP_TYPE_XSKMAP:
4727                 if (func_id != BPF_FUNC_redirect_map &&
4728                     func_id != BPF_FUNC_map_lookup_elem)
4729                         goto error;
4730                 break;
4731         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4732         case BPF_MAP_TYPE_HASH_OF_MAPS:
4733                 if (func_id != BPF_FUNC_map_lookup_elem)
4734                         goto error;
4735                 break;
4736         case BPF_MAP_TYPE_SOCKMAP:
4737                 if (func_id != BPF_FUNC_sk_redirect_map &&
4738                     func_id != BPF_FUNC_sock_map_update &&
4739                     func_id != BPF_FUNC_map_delete_elem &&
4740                     func_id != BPF_FUNC_msg_redirect_map &&
4741                     func_id != BPF_FUNC_sk_select_reuseport &&
4742                     func_id != BPF_FUNC_map_lookup_elem &&
4743                     !may_update_sockmap(env, func_id))
4744                         goto error;
4745                 break;
4746         case BPF_MAP_TYPE_SOCKHASH:
4747                 if (func_id != BPF_FUNC_sk_redirect_hash &&
4748                     func_id != BPF_FUNC_sock_hash_update &&
4749                     func_id != BPF_FUNC_map_delete_elem &&
4750                     func_id != BPF_FUNC_msg_redirect_hash &&
4751                     func_id != BPF_FUNC_sk_select_reuseport &&
4752                     func_id != BPF_FUNC_map_lookup_elem &&
4753                     !may_update_sockmap(env, func_id))
4754                         goto error;
4755                 break;
4756         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
4757                 if (func_id != BPF_FUNC_sk_select_reuseport)
4758                         goto error;
4759                 break;
4760         case BPF_MAP_TYPE_QUEUE:
4761         case BPF_MAP_TYPE_STACK:
4762                 if (func_id != BPF_FUNC_map_peek_elem &&
4763                     func_id != BPF_FUNC_map_pop_elem &&
4764                     func_id != BPF_FUNC_map_push_elem)
4765                         goto error;
4766                 break;
4767         case BPF_MAP_TYPE_SK_STORAGE:
4768                 if (func_id != BPF_FUNC_sk_storage_get &&
4769                     func_id != BPF_FUNC_sk_storage_delete)
4770                         goto error;
4771                 break;
4772         case BPF_MAP_TYPE_INODE_STORAGE:
4773                 if (func_id != BPF_FUNC_inode_storage_get &&
4774                     func_id != BPF_FUNC_inode_storage_delete)
4775                         goto error;
4776                 break;
4777         default:
4778                 break;
4779         }
4780
4781         /* ... and second from the function itself. */
4782         switch (func_id) {
4783         case BPF_FUNC_tail_call:
4784                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
4785                         goto error;
4786                 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4787                         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4788                         return -EINVAL;
4789                 }
4790                 break;
4791         case BPF_FUNC_perf_event_read:
4792         case BPF_FUNC_perf_event_output:
4793         case BPF_FUNC_perf_event_read_value:
4794         case BPF_FUNC_skb_output:
4795         case BPF_FUNC_xdp_output:
4796                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
4797                         goto error;
4798                 break;
4799         case BPF_FUNC_ringbuf_output:
4800         case BPF_FUNC_ringbuf_reserve:
4801         case BPF_FUNC_ringbuf_query:
4802                 if (map->map_type != BPF_MAP_TYPE_RINGBUF)
4803                         goto error;
4804                 break;
4805         case BPF_FUNC_get_stackid:
4806                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
4807                         goto error;
4808                 break;
4809         case BPF_FUNC_current_task_under_cgroup:
4810         case BPF_FUNC_skb_under_cgroup:
4811                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
4812                         goto error;
4813                 break;
4814         case BPF_FUNC_redirect_map:
4815                 if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
4816                     map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4817                     map->map_type != BPF_MAP_TYPE_CPUMAP &&
4818                     map->map_type != BPF_MAP_TYPE_XSKMAP)
4819                         goto error;
4820                 break;
4821         case BPF_FUNC_sk_redirect_map:
4822         case BPF_FUNC_msg_redirect_map:
4823         case BPF_FUNC_sock_map_update:
4824                 if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
4825                         goto error;
4826                 break;
4827         case BPF_FUNC_sk_redirect_hash:
4828         case BPF_FUNC_msg_redirect_hash:
4829         case BPF_FUNC_sock_hash_update:
4830                 if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
4831                         goto error;
4832                 break;
4833         case BPF_FUNC_get_local_storage:
4834                 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
4835                     map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
4836                         goto error;
4837                 break;
4838         case BPF_FUNC_sk_select_reuseport:
4839                 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
4840                     map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4841                     map->map_type != BPF_MAP_TYPE_SOCKHASH)
4842                         goto error;
4843                 break;
4844         case BPF_FUNC_map_peek_elem:
4845         case BPF_FUNC_map_pop_elem:
4846         case BPF_FUNC_map_push_elem:
4847                 if (map->map_type != BPF_MAP_TYPE_QUEUE &&
4848                     map->map_type != BPF_MAP_TYPE_STACK)
4849                         goto error;
4850                 break;
4851         case BPF_FUNC_sk_storage_get:
4852         case BPF_FUNC_sk_storage_delete:
4853                 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
4854                         goto error;
4855                 break;
4856         case BPF_FUNC_inode_storage_get:
4857         case BPF_FUNC_inode_storage_delete:
4858                 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
4859                         goto error;
4860                 break;
4861         default:
4862                 break;
4863         }
4864
4865         return 0;
4866 error:
4867         verbose(env, "cannot pass map_type %d into func %s#%d\n",
4868                 map->map_type, func_id_name(func_id), func_id);
4869         return -EINVAL;
4870 }
4871
4872 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4873 {
4874         int count = 0;
4875
4876         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
4877                 count++;
4878         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
4879                 count++;
4880         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
4881                 count++;
4882         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
4883                 count++;
4884         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
4885                 count++;
4886
4887         /* We only support one arg being in raw mode at the moment,
4888          * which is sufficient for the helper functions we have
4889          * right now.
4890          */
4891         return count <= 1;
4892 }
4893
4894 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
4895                                     enum bpf_arg_type arg_next)
4896 {
4897         return (arg_type_is_mem_ptr(arg_curr) &&
4898                 !arg_type_is_mem_size(arg_next)) ||
4899                (!arg_type_is_mem_ptr(arg_curr) &&
4900                 arg_type_is_mem_size(arg_next));
4901 }
4902
4903 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4904 {
4905         /* bpf_xxx(..., buf, len) call will access 'len'
4906          * bytes from memory 'buf'. Both arg types need
4907          * to be paired, so make sure there's no buggy
4908          * helper function specification.
4909          */
4910         if (arg_type_is_mem_size(fn->arg1_type) ||
4911             arg_type_is_mem_ptr(fn->arg5_type)  ||
4912             check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4913             check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4914             check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4915             check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
4916                 return false;
4917
4918         return true;
4919 }
4920
4921 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4922 {
4923         int count = 0;
4924
4925         if (arg_type_may_be_refcounted(fn->arg1_type))
4926                 count++;
4927         if (arg_type_may_be_refcounted(fn->arg2_type))
4928                 count++;
4929         if (arg_type_may_be_refcounted(fn->arg3_type))
4930                 count++;
4931         if (arg_type_may_be_refcounted(fn->arg4_type))
4932                 count++;
4933         if (arg_type_may_be_refcounted(fn->arg5_type))
4934                 count++;
4935
4936         /* A reference acquiring function cannot acquire
4937          * another refcounted ptr.
4938          */
4939         if (may_be_acquire_function(func_id) && count)
4940                 return false;
4941
4942         /* We only support one arg being unreferenced at the moment,
4943          * which is sufficient for the helper functions we have right now.
4944          */
4945         return count <= 1;
4946 }
4947
4948 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4949 {
4950         int i;
4951
4952         for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4953                 if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
4954                         return false;
4955
4956                 if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
4957                         return false;
4958         }
4959
4960         return true;
4961 }
4962
4963 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4964 {
4965         return check_raw_mode_ok(fn) &&
4966                check_arg_pair_ok(fn) &&
4967                check_btf_id_ok(fn) &&
4968                check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
4969 }
4970
4971 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4972  * are now invalid, so turn them into unknown SCALAR_VALUE.
4973  */
4974 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env,
4975                                      struct bpf_func_state *state)
4976 {
4977         struct bpf_reg_state *regs = state->regs, *reg;
4978         int i;
4979
4980         for (i = 0; i < MAX_BPF_REG; i++)
4981                 if (reg_is_pkt_pointer_any(&regs[i]))
4982                         mark_reg_unknown(env, regs, i);
4983
4984         bpf_for_each_spilled_reg(i, state, reg) {
4985                 if (!reg)
4986                         continue;
4987                 if (reg_is_pkt_pointer_any(reg))
4988                         __mark_reg_unknown(env, reg);
4989         }
4990 }
4991
4992 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
4993 {
4994         struct bpf_verifier_state *vstate = env->cur_state;
4995         int i;
4996
4997         for (i = 0; i <= vstate->curframe; i++)
4998                 __clear_all_pkt_pointers(env, vstate->frame[i]);
4999 }
5000
5001 static void release_reg_references(struct bpf_verifier_env *env,
5002                                    struct bpf_func_state *state,
5003                                    int ref_obj_id)
5004 {
5005         struct bpf_reg_state *regs = state->regs, *reg;
5006         int i;
5007
5008         for (i = 0; i < MAX_BPF_REG; i++)
5009                 if (regs[i].ref_obj_id == ref_obj_id)
5010                         mark_reg_unknown(env, regs, i);
5011
5012         bpf_for_each_spilled_reg(i, state, reg) {
5013                 if (!reg)
5014                         continue;
5015                 if (reg->ref_obj_id == ref_obj_id)
5016                         __mark_reg_unknown(env, reg);
5017         }
5018 }
5019
5020 /* The pointer with the specified id has released its reference to kernel
5021  * resources. Identify all copies of the same pointer and clear the reference.
5022  */
5023 static int release_reference(struct bpf_verifier_env *env,
5024                              int ref_obj_id)
5025 {
5026         struct bpf_verifier_state *vstate = env->cur_state;
5027         int err;
5028         int i;
5029
5030         err = release_reference_state(cur_func(env), ref_obj_id);
5031         if (err)
5032                 return err;
5033
5034         for (i = 0; i <= vstate->curframe; i++)
5035                 release_reg_references(env, vstate->frame[i], ref_obj_id);
5036
5037         return 0;
5038 }
5039
5040 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5041                                     struct bpf_reg_state *regs)
5042 {
5043         int i;
5044
5045         /* after the call registers r0 - r5 were scratched */
5046         for (i = 0; i < CALLER_SAVED_REGS; i++) {
5047                 mark_reg_not_init(env, regs, caller_saved[i]);
5048                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5049         }
5050 }
5051
5052 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
5053                            int *insn_idx)
5054 {
5055         struct bpf_verifier_state *state = env->cur_state;
5056         struct bpf_func_info_aux *func_info_aux;
5057         struct bpf_func_state *caller, *callee;
5058         int i, err, subprog, target_insn;
5059         bool is_global = false;
5060
5061         if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5062                 verbose(env, "the call stack of %d frames is too deep\n",
5063                         state->curframe + 2);
5064                 return -E2BIG;
5065         }
5066
5067         target_insn = *insn_idx + insn->imm;
5068         subprog = find_subprog(env, target_insn + 1);
5069         if (subprog < 0) {
5070                 verbose(env, "verifier bug. No program starts at insn %d\n",
5071                         target_insn + 1);
5072                 return -EFAULT;
5073         }
5074
5075         caller = state->frame[state->curframe];
5076         if (state->frame[state->curframe + 1]) {
5077                 verbose(env, "verifier bug. Frame %d already allocated\n",
5078                         state->curframe + 1);
5079                 return -EFAULT;
5080         }
5081
5082         func_info_aux = env->prog->aux->func_info_aux;
5083         if (func_info_aux)
5084                 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5085         err = btf_check_func_arg_match(env, subprog, caller->regs);
5086         if (err == -EFAULT)
5087                 return err;
5088         if (is_global) {
5089                 if (err) {
5090                         verbose(env, "Caller passes invalid args into func#%d\n",
5091                                 subprog);
5092                         return err;
5093                 } else {
5094                         if (env->log.level & BPF_LOG_LEVEL)
5095                                 verbose(env,
5096                                         "Func#%d is global and valid. Skipping.\n",
5097                                         subprog);
5098                         clear_caller_saved_regs(env, caller->regs);
5099
5100                         /* All global functions return a 64-bit SCALAR_VALUE */
5101                         mark_reg_unknown(env, caller->regs, BPF_REG_0);
5102                         caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5103
5104                         /* continue with next insn after call */
5105                         return 0;
5106                 }
5107         }
5108
5109         callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5110         if (!callee)
5111                 return -ENOMEM;
5112         state->frame[state->curframe + 1] = callee;
5113
5114         /* callee cannot access r0, r6 - r9 for reading and has to write
5115          * into its own stack before reading from it.
5116          * callee can read/write into caller's stack
5117          */
5118         init_func_state(env, callee,
5119                         /* remember the callsite, it will be used by bpf_exit */
5120                         *insn_idx /* callsite */,
5121                         state->curframe + 1 /* frameno within this callchain */,
5122                         subprog /* subprog number within this prog */);
5123
5124         /* Transfer references to the callee */
5125         err = transfer_reference_state(callee, caller);
5126         if (err)
5127                 return err;
5128
5129         /* copy r1 - r5 args that callee can access.  The copy includes parent
5130          * pointers, which connects us up to the liveness chain
5131          */
5132         for (i = BPF_REG_1; i <= BPF_REG_5; i++)
5133                 callee->regs[i] = caller->regs[i];
5134
5135         clear_caller_saved_regs(env, caller->regs);
5136
5137         /* only increment it after check_reg_arg() finished */
5138         state->curframe++;
5139
5140         /* and go analyze first insn of the callee */
5141         *insn_idx = target_insn;
5142
5143         if (env->log.level & BPF_LOG_LEVEL) {
5144                 verbose(env, "caller:\n");
5145                 print_verifier_state(env, caller);
5146                 verbose(env, "callee:\n");
5147                 print_verifier_state(env, callee);
5148         }
5149         return 0;
5150 }
5151
5152 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
5153 {
5154         struct bpf_verifier_state *state = env->cur_state;
5155         struct bpf_func_state *caller, *callee;
5156         struct bpf_reg_state *r0;
5157         int err;
5158
5159         callee = state->frame[state->curframe];
5160         r0 = &callee->regs[BPF_REG_0];
5161         if (r0->type == PTR_TO_STACK) {
5162                 /* technically it's ok to return caller's stack pointer
5163                  * (or caller's caller's pointer) back to the caller,
5164                  * since these pointers are valid. Only current stack
5165                  * pointer will be invalid as soon as function exits,
5166                  * but let's be conservative
5167                  */
5168                 verbose(env, "cannot return stack pointer to the caller\n");
5169                 return -EINVAL;
5170         }
5171
5172         state->curframe--;
5173         caller = state->frame[state->curframe];
5174         /* return to the caller whatever r0 had in the callee */
5175         caller->regs[BPF_REG_0] = *r0;
5176
5177         /* Transfer references to the caller */
5178         err = transfer_reference_state(caller, callee);
5179         if (err)
5180                 return err;
5181
5182         *insn_idx = callee->callsite + 1;
5183         if (env->log.level & BPF_LOG_LEVEL) {
5184                 verbose(env, "returning from callee:\n");
5185                 print_verifier_state(env, callee);
5186                 verbose(env, "to caller at %d:\n", *insn_idx);
5187                 print_verifier_state(env, caller);
5188         }
5189         /* clear everything in the callee */
5190         free_func_state(callee);
5191         state->frame[state->curframe + 1] = NULL;
5192         return 0;
5193 }
5194
5195 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
5196                                    int func_id,
5197                                    struct bpf_call_arg_meta *meta)
5198 {
5199         struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
5200
5201         if (ret_type != RET_INTEGER ||
5202             (func_id != BPF_FUNC_get_stack &&
5203              func_id != BPF_FUNC_probe_read_str &&
5204              func_id != BPF_FUNC_probe_read_kernel_str &&
5205              func_id != BPF_FUNC_probe_read_user_str))
5206                 return;
5207
5208         ret_reg->smax_value = meta->msize_max_value;
5209         ret_reg->s32_max_value = meta->msize_max_value;
5210         ret_reg->smin_value = -MAX_ERRNO;
5211         ret_reg->s32_min_value = -MAX_ERRNO;
5212         __reg_deduce_bounds(ret_reg);
5213         __reg_bound_offset(ret_reg);
5214         __update_reg_bounds(ret_reg);
5215 }
5216
5217 static int
5218 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5219                 int func_id, int insn_idx)
5220 {
5221         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5222         struct bpf_map *map = meta->map_ptr;
5223
5224         if (func_id != BPF_FUNC_tail_call &&
5225             func_id != BPF_FUNC_map_lookup_elem &&
5226             func_id != BPF_FUNC_map_update_elem &&
5227             func_id != BPF_FUNC_map_delete_elem &&
5228             func_id != BPF_FUNC_map_push_elem &&
5229             func_id != BPF_FUNC_map_pop_elem &&
5230             func_id != BPF_FUNC_map_peek_elem)
5231                 return 0;
5232
5233         if (map == NULL) {
5234                 verbose(env, "kernel subsystem misconfigured verifier\n");
5235                 return -EINVAL;
5236         }
5237
5238         /* In case of read-only, some additional restrictions
5239          * need to be applied in order to prevent altering the
5240          * state of the map from program side.
5241          */
5242         if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5243             (func_id == BPF_FUNC_map_delete_elem ||
5244              func_id == BPF_FUNC_map_update_elem ||
5245              func_id == BPF_FUNC_map_push_elem ||
5246              func_id == BPF_FUNC_map_pop_elem)) {
5247                 verbose(env, "write into map forbidden\n");
5248                 return -EACCES;
5249         }
5250
5251         if (!BPF_MAP_PTR(aux->map_ptr_state))
5252                 bpf_map_ptr_store(aux, meta->map_ptr,
5253                                   !meta->map_ptr->bypass_spec_v1);
5254         else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
5255                 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
5256                                   !meta->map_ptr->bypass_spec_v1);
5257         return 0;
5258 }
5259
5260 static int
5261 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5262                 int func_id, int insn_idx)
5263 {
5264         struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5265         struct bpf_reg_state *regs = cur_regs(env), *reg;
5266         struct bpf_map *map = meta->map_ptr;
5267         struct tnum range;
5268         u64 val;
5269         int err;
5270
5271         if (func_id != BPF_FUNC_tail_call)
5272                 return 0;
5273         if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5274                 verbose(env, "kernel subsystem misconfigured verifier\n");
5275                 return -EINVAL;
5276         }
5277
5278         range = tnum_range(0, map->max_entries - 1);
5279         reg = &regs[BPF_REG_3];
5280
5281         if (!register_is_const(reg) || !tnum_in(range, reg->var_off)) {
5282                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5283                 return 0;
5284         }
5285
5286         err = mark_chain_precision(env, BPF_REG_3);
5287         if (err)
5288                 return err;
5289
5290         val = reg->var_off.value;
5291         if (bpf_map_key_unseen(aux))
5292                 bpf_map_key_store(aux, val);
5293         else if (!bpf_map_key_poisoned(aux) &&
5294                   bpf_map_key_immediate(aux) != val)
5295                 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5296         return 0;
5297 }
5298
5299 static int check_reference_leak(struct bpf_verifier_env *env)
5300 {
5301         struct bpf_func_state *state = cur_func(env);
5302         int i;
5303
5304         for (i = 0; i < state->acquired_refs; i++) {
5305                 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
5306                         state->refs[i].id, state->refs[i].insn_idx);
5307         }
5308         return state->acquired_refs ? -EINVAL : 0;
5309 }
5310
5311 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
5312 {
5313         const struct bpf_func_proto *fn = NULL;
5314         struct bpf_reg_state *regs;
5315         struct bpf_call_arg_meta meta;
5316         bool changes_data;
5317         int i, err;
5318
5319         /* find function prototype */
5320         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5321                 verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
5322                         func_id);
5323                 return -EINVAL;
5324         }
5325
5326         if (env->ops->get_func_proto)
5327                 fn = env->ops->get_func_proto(func_id, env->prog);
5328         if (!fn) {
5329                 verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
5330                         func_id);
5331                 return -EINVAL;
5332         }
5333
5334         /* eBPF programs must be GPL compatible to use GPL-ed functions */
5335         if (!env->prog->gpl_compatible && fn->gpl_only) {
5336                 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5337                 return -EINVAL;
5338         }
5339
5340         if (fn->allowed && !fn->allowed(env->prog)) {
5341                 verbose(env, "helper call is not allowed in probe\n");
5342                 return -EINVAL;
5343         }
5344
5345         /* With LD_ABS/IND some JITs save/restore skb from r1. */
5346         changes_data = bpf_helper_changes_pkt_data(fn->func);
5347         if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
5348                 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
5349                         func_id_name(func_id), func_id);
5350                 return -EINVAL;
5351         }
5352
5353         memset(&meta, 0, sizeof(meta));
5354         meta.pkt_access = fn->pkt_access;
5355
5356         err = check_func_proto(fn, func_id);
5357         if (err) {
5358                 verbose(env, "kernel subsystem misconfigured func %s#%d\n",
5359                         func_id_name(func_id), func_id);
5360                 return err;
5361         }
5362
5363         meta.func_id = func_id;
5364         /* check args */
5365         for (i = 0; i < 5; i++) {
5366                 err = check_func_arg(env, i, &meta, fn);
5367                 if (err)
5368                         return err;
5369         }
5370
5371         err = record_func_map(env, &meta, func_id, insn_idx);
5372         if (err)
5373                 return err;
5374
5375         err = record_func_key(env, &meta, func_id, insn_idx);
5376         if (err)
5377                 return err;
5378
5379         /* Mark slots with STACK_MISC in case of raw mode, stack offset
5380          * is inferred from register state.
5381          */
5382         for (i = 0; i < meta.access_size; i++) {
5383                 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
5384                                        BPF_WRITE, -1, false);
5385                 if (err)
5386                         return err;
5387         }
5388
5389         if (func_id == BPF_FUNC_tail_call) {
5390                 err = check_reference_leak(env);
5391                 if (err) {
5392                         verbose(env, "tail_call would lead to reference leak\n");
5393                         return err;
5394                 }
5395         } else if (is_release_function(func_id)) {
5396                 err = release_reference(env, meta.ref_obj_id);
5397                 if (err) {
5398                         verbose(env, "func %s#%d reference has not been acquired before\n",
5399                                 func_id_name(func_id), func_id);
5400                         return err;
5401                 }
5402         }
5403
5404         regs = cur_regs(env);
5405
5406         /* check that flags argument in get_local_storage(map, flags) is 0,
5407          * this is required because get_local_storage() can't return an error.
5408          */
5409         if (func_id == BPF_FUNC_get_local_storage &&
5410             !register_is_null(&regs[BPF_REG_2])) {
5411                 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
5412                 return -EINVAL;
5413         }
5414
5415         /* reset caller saved regs */
5416         for (i = 0; i < CALLER_SAVED_REGS; i++) {
5417                 mark_reg_not_init(env, regs, caller_saved[i]);
5418                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5419         }
5420
5421         /* helper call returns 64-bit value. */
5422         regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5423
5424         /* update return register (already marked as written above) */
5425         if (fn->ret_type == RET_INTEGER) {
5426                 /* sets type to SCALAR_VALUE */
5427                 mark_reg_unknown(env, regs, BPF_REG_0);
5428         } else if (fn->ret_type == RET_VOID) {
5429                 regs[BPF_REG_0].type = NOT_INIT;
5430         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
5431                    fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5432                 /* There is no offset yet applied, variable or fixed */
5433                 mark_reg_known_zero(env, regs, BPF_REG_0);
5434                 /* remember map_ptr, so that check_map_access()
5435                  * can check 'value_size' boundary of memory access
5436                  * to map element returned from bpf_map_lookup_elem()
5437                  */
5438                 if (meta.map_ptr == NULL) {
5439                         verbose(env,
5440                                 "kernel subsystem misconfigured verifier\n");
5441                         return -EINVAL;
5442                 }
5443                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
5444                 if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5445                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
5446                         if (map_value_has_spin_lock(meta.map_ptr))
5447                                 regs[BPF_REG_0].id = ++env->id_gen;
5448                 } else {
5449                         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
5450                 }
5451         } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
5452                 mark_reg_known_zero(env, regs, BPF_REG_0);
5453                 regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
5454         } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
5455                 mark_reg_known_zero(env, regs, BPF_REG_0);
5456                 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
5457         } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
5458                 mark_reg_known_zero(env, regs, BPF_REG_0);
5459                 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
5460         } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
5461                 mark_reg_known_zero(env, regs, BPF_REG_0);
5462                 regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
5463                 regs[BPF_REG_0].mem_size = meta.mem_size;
5464         } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
5465                    fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
5466                 const struct btf_type *t;
5467
5468                 mark_reg_known_zero(env, regs, BPF_REG_0);
5469                 t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5470                 if (!btf_type_is_struct(t)) {
5471                         u32 tsize;
5472                         const struct btf_type *ret;
5473                         const char *tname;
5474
5475                         /* resolve the type size of ksym. */
5476                         ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5477                         if (IS_ERR(ret)) {
5478                                 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5479                                 verbose(env, "unable to resolve the size of type '%s': %ld\n",
5480                                         tname, PTR_ERR(ret));
5481                                 return -EINVAL;
5482                         }
5483                         regs[BPF_REG_0].type =
5484                                 fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5485                                 PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
5486                         regs[BPF_REG_0].mem_size = tsize;
5487                 } else {
5488                         regs[BPF_REG_0].type =
5489                                 fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5490                                 PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
5491                         regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5492                 }
5493         } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
5494                 int ret_btf_id;
5495
5496                 mark_reg_known_zero(env, regs, BPF_REG_0);
5497                 regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
5498                 ret_btf_id = *fn->ret_btf_id;
5499                 if (ret_btf_id == 0) {
5500                         verbose(env, "invalid return type %d of func %s#%d\n",
5501                                 fn->ret_type, func_id_name(func_id), func_id);
5502                         return -EINVAL;
5503                 }
5504                 regs[BPF_REG_0].btf_id = ret_btf_id;
5505         } else {
5506                 verbose(env, "unknown return type %d of func %s#%d\n",
5507                         fn->ret_type, func_id_name(func_id), func_id);
5508                 return -EINVAL;
5509         }
5510
5511         if (reg_type_may_be_null(regs[BPF_REG_0].type))
5512                 regs[BPF_REG_0].id = ++env->id_gen;
5513
5514         if (is_ptr_cast_function(func_id)) {
5515                 /* For release_reference() */
5516                 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5517         } else if (is_acquire_function(func_id, meta.map_ptr)) {
5518                 int id = acquire_reference_state(env, insn_idx);
5519
5520                 if (id < 0)
5521                         return id;
5522                 /* For mark_ptr_or_null_reg() */
5523                 regs[BPF_REG_0].id = id;
5524                 /* For release_reference() */
5525                 regs[BPF_REG_0].ref_obj_id = id;
5526         }
5527
5528         do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
5529
5530         err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5531         if (err)
5532                 return err;
5533
5534         if ((func_id == BPF_FUNC_get_stack ||
5535              func_id == BPF_FUNC_get_task_stack) &&
5536             !env->prog->has_callchain_buf) {
5537                 const char *err_str;
5538
5539 #ifdef CONFIG_PERF_EVENTS
5540                 err = get_callchain_buffers(sysctl_perf_event_max_stack);
5541                 err_str = "cannot get callchain buffer for func %s#%d\n";
5542 #else
5543                 err = -ENOTSUPP;
5544                 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
5545 #endif
5546                 if (err) {
5547                         verbose(env, err_str, func_id_name(func_id), func_id);
5548                         return err;
5549                 }
5550
5551                 env->prog->has_callchain_buf = true;
5552         }
5553
5554         if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
5555                 env->prog->call_get_stack = true;
5556
5557         if (changes_data)
5558                 clear_all_pkt_pointers(env);
5559         return 0;
5560 }
5561
5562 static bool signed_add_overflows(s64 a, s64 b)
5563 {
5564         /* Do the add in u64, where overflow is well-defined */
5565         s64 res = (s64)((u64)a + (u64)b);
5566
5567         if (b < 0)
5568                 return res > a;
5569         return res < a;
5570 }
5571
5572 static bool signed_add32_overflows(s32 a, s32 b)
5573 {
5574         /* Do the add in u32, where overflow is well-defined */
5575         s32 res = (s32)((u32)a + (u32)b);
5576
5577         if (b < 0)
5578                 return res > a;
5579         return res < a;
5580 }
5581
5582 static bool signed_sub_overflows(s64 a, s64 b)
5583 {
5584         /* Do the sub in u64, where overflow is well-defined */
5585         s64 res = (s64)((u64)a - (u64)b);
5586
5587         if (b < 0)
5588                 return res < a;
5589         return res > a;
5590 }
5591
5592 static bool signed_sub32_overflows(s32 a, s32 b)
5593 {
5594         /* Do the sub in u32, where overflow is well-defined */
5595         s32 res = (s32)((u32)a - (u32)b);
5596
5597         if (b < 0)
5598                 return res < a;
5599         return res > a;
5600 }
5601
5602 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
5603                                   const struct bpf_reg_state *reg,
5604                                   enum bpf_reg_type type)
5605 {
5606         bool known = tnum_is_const(reg->var_off);
5607         s64 val = reg->var_off.value;
5608         s64 smin = reg->smin_value;
5609
5610         if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
5611                 verbose(env, "math between %s pointer and %lld is not allowed\n",
5612                         reg_type_str[type], val);
5613                 return false;
5614         }
5615
5616         if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
5617                 verbose(env, "%s pointer offset %d is not allowed\n",
5618                         reg_type_str[type], reg->off);
5619                 return false;
5620         }
5621
5622         if (smin == S64_MIN) {
5623                 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
5624                         reg_type_str[type]);
5625                 return false;
5626         }
5627
5628         if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
5629                 verbose(env, "value %lld makes %s pointer be out of bounds\n",
5630                         smin, reg_type_str[type]);
5631                 return false;
5632         }
5633
5634         return true;
5635 }
5636
5637 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
5638 {
5639         return &env->insn_aux_data[env->insn_idx];
5640 }
5641
5642 enum {
5643         REASON_BOUNDS   = -1,
5644         REASON_TYPE     = -2,
5645         REASON_PATHS    = -3,
5646         REASON_LIMIT    = -4,
5647         REASON_STACK    = -5,
5648 };
5649
5650 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
5651                               u32 *alu_limit, bool mask_to_left)
5652 {
5653         u32 max = 0, ptr_limit = 0;
5654
5655         switch (ptr_reg->type) {
5656         case PTR_TO_STACK:
5657                 /* Offset 0 is out-of-bounds, but acceptable start for the
5658                  * left direction, see BPF_REG_FP. Also, unknown scalar
5659                  * offset where we would need to deal with min/max bounds is
5660                  * currently prohibited for unprivileged.
5661                  */
5662                 max = MAX_BPF_STACK + mask_to_left;
5663                 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
5664                 break;
5665         case PTR_TO_MAP_VALUE:
5666                 max = ptr_reg->map_ptr->value_size;
5667                 ptr_limit = (mask_to_left ?
5668                              ptr_reg->smin_value :
5669                              ptr_reg->umax_value) + ptr_reg->off;
5670                 break;
5671         default:
5672                 return REASON_TYPE;
5673         }
5674
5675         if (ptr_limit >= max)
5676                 return REASON_LIMIT;
5677         *alu_limit = ptr_limit;
5678         return 0;
5679 }
5680
5681 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
5682                                     const struct bpf_insn *insn)
5683 {
5684         return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5685 }
5686
5687 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
5688                                        u32 alu_state, u32 alu_limit)
5689 {
5690         /* If we arrived here from different branches with different
5691          * state or limits to sanitize, then this won't work.
5692          */
5693         if (aux->alu_state &&
5694             (aux->alu_state != alu_state ||
5695              aux->alu_limit != alu_limit))
5696                 return REASON_PATHS;
5697
5698         /* Corresponding fixup done in fixup_bpf_calls(). */
5699         aux->alu_state = alu_state;
5700         aux->alu_limit = alu_limit;
5701         return 0;
5702 }
5703
5704 static int sanitize_val_alu(struct bpf_verifier_env *env,
5705                             struct bpf_insn *insn)
5706 {
5707         struct bpf_insn_aux_data *aux = cur_aux(env);
5708
5709         if (can_skip_alu_sanitation(env, insn))
5710                 return 0;
5711
5712         return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
5713 }
5714
5715 static bool sanitize_needed(u8 opcode)
5716 {
5717         return opcode == BPF_ADD || opcode == BPF_SUB;
5718 }
5719
5720 struct bpf_sanitize_info {
5721         struct bpf_insn_aux_data aux;
5722         bool mask_to_left;
5723 };
5724
5725 static struct bpf_verifier_state *
5726 sanitize_speculative_path(struct bpf_verifier_env *env,
5727                           const struct bpf_insn *insn,
5728                           u32 next_idx, u32 curr_idx)
5729 {
5730         struct bpf_verifier_state *branch;
5731         struct bpf_reg_state *regs;
5732
5733         branch = push_stack(env, next_idx, curr_idx, true);
5734         if (branch && insn) {
5735                 regs = branch->frame[branch->curframe]->regs;
5736                 if (BPF_SRC(insn->code) == BPF_K) {
5737                         mark_reg_unknown(env, regs, insn->dst_reg);
5738                 } else if (BPF_SRC(insn->code) == BPF_X) {
5739                         mark_reg_unknown(env, regs, insn->dst_reg);
5740                         mark_reg_unknown(env, regs, insn->src_reg);
5741                 }
5742         }
5743         return branch;
5744 }
5745
5746 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
5747                             struct bpf_insn *insn,
5748                             const struct bpf_reg_state *ptr_reg,
5749                             const struct bpf_reg_state *off_reg,
5750                             struct bpf_reg_state *dst_reg,
5751                             struct bpf_sanitize_info *info,
5752                             const bool commit_window)
5753 {
5754         struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
5755         struct bpf_verifier_state *vstate = env->cur_state;
5756         bool off_is_imm = tnum_is_const(off_reg->var_off);
5757         bool off_is_neg = off_reg->smin_value < 0;
5758         bool ptr_is_dst_reg = ptr_reg == dst_reg;
5759         u8 opcode = BPF_OP(insn->code);
5760         u32 alu_state, alu_limit;
5761         struct bpf_reg_state tmp;
5762         bool ret;
5763         int err;
5764
5765         if (can_skip_alu_sanitation(env, insn))
5766                 return 0;
5767
5768         /* We already marked aux for masking from non-speculative
5769          * paths, thus we got here in the first place. We only care
5770          * to explore bad access from here.
5771          */
5772         if (vstate->speculative)
5773                 goto do_sim;
5774
5775         if (!commit_window) {
5776                 if (!tnum_is_const(off_reg->var_off) &&
5777                     (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
5778                         return REASON_BOUNDS;
5779
5780                 info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
5781                                      (opcode == BPF_SUB && !off_is_neg);
5782         }
5783
5784         err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
5785         if (err < 0)
5786                 return err;
5787
5788         if (commit_window) {
5789                 /* In commit phase we narrow the masking window based on
5790                  * the observed pointer move after the simulated operation.
5791                  */
5792                 alu_state = info->aux.alu_state;
5793                 alu_limit = abs(info->aux.alu_limit - alu_limit);
5794         } else {
5795                 alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
5796                 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
5797                 alu_state |= ptr_is_dst_reg ?
5798                              BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
5799
5800                 /* Limit pruning on unknown scalars to enable deep search for
5801                  * potential masking differences from other program paths.
5802                  */
5803                 if (!off_is_imm)
5804                         env->explore_alu_limits = true;
5805         }
5806
5807         err = update_alu_sanitation_state(aux, alu_state, alu_limit);
5808         if (err < 0)
5809                 return err;
5810 do_sim:
5811         /* If we're in commit phase, we're done here given we already
5812          * pushed the truncated dst_reg into the speculative verification
5813          * stack.
5814          *
5815          * Also, when register is a known constant, we rewrite register-based
5816          * operation to immediate-based, and thus do not need masking (and as
5817          * a consequence, do not need to simulate the zero-truncation either).
5818          */
5819         if (commit_window || off_is_imm)
5820                 return 0;
5821
5822         /* Simulate and find potential out-of-bounds access under
5823          * speculative execution from truncation as a result of
5824          * masking when off was not within expected range. If off
5825          * sits in dst, then we temporarily need to move ptr there
5826          * to simulate dst (== 0) +/-= ptr. Needed, for example,
5827          * for cases where we use K-based arithmetic in one direction
5828          * and truncated reg-based in the other in order to explore
5829          * bad access.
5830          */
5831         if (!ptr_is_dst_reg) {
5832                 tmp = *dst_reg;
5833                 *dst_reg = *ptr_reg;
5834         }
5835         ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
5836                                         env->insn_idx);
5837         if (!ptr_is_dst_reg && ret)
5838                 *dst_reg = tmp;
5839         return !ret ? REASON_STACK : 0;
5840 }
5841
5842 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
5843 {
5844         struct bpf_verifier_state *vstate = env->cur_state;
5845
5846         /* If we simulate paths under speculation, we don't update the
5847          * insn as 'seen' such that when we verify unreachable paths in
5848          * the non-speculative domain, sanitize_dead_code() can still
5849          * rewrite/sanitize them.
5850          */
5851         if (!vstate->speculative)
5852                 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
5853 }
5854
5855 static int sanitize_err(struct bpf_verifier_env *env,
5856                         const struct bpf_insn *insn, int reason,
5857                         const struct bpf_reg_state *off_reg,
5858                         const struct bpf_reg_state *dst_reg)
5859 {
5860         static const char *err = "pointer arithmetic with it prohibited for !root";
5861         const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
5862         u32 dst = insn->dst_reg, src = insn->src_reg;
5863
5864         switch (reason) {
5865         case REASON_BOUNDS:
5866                 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
5867                         off_reg == dst_reg ? dst : src, err);
5868                 break;
5869         case REASON_TYPE:
5870                 verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
5871                         off_reg == dst_reg ? src : dst, err);
5872                 break;
5873         case REASON_PATHS:
5874                 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
5875                         dst, op, err);
5876                 break;
5877         case REASON_LIMIT:
5878                 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
5879                         dst, op, err);
5880                 break;
5881         case REASON_STACK:
5882                 verbose(env, "R%d could not be pushed for speculative verification, %s\n",
5883                         dst, err);
5884                 break;
5885         default:
5886                 verbose(env, "verifier internal error: unknown reason (%d)\n",
5887                         reason);
5888                 break;
5889         }
5890
5891         return -EACCES;
5892 }
5893
5894 /* check that stack access falls within stack limits and that 'reg' doesn't
5895  * have a variable offset.
5896  *
5897  * Variable offset is prohibited for unprivileged mode for simplicity since it
5898  * requires corresponding support in Spectre masking for stack ALU.  See also
5899  * retrieve_ptr_limit().
5900  *
5901  *
5902  * 'off' includes 'reg->off'.
5903  */
5904 static int check_stack_access_for_ptr_arithmetic(
5905                                 struct bpf_verifier_env *env,
5906                                 int regno,
5907                                 const struct bpf_reg_state *reg,
5908                                 int off)
5909 {
5910         if (!tnum_is_const(reg->var_off)) {
5911                 char tn_buf[48];
5912
5913                 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5914                 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
5915                         regno, tn_buf, off);
5916                 return -EACCES;
5917         }
5918
5919         if (off >= 0 || off < -MAX_BPF_STACK) {
5920                 verbose(env, "R%d stack pointer arithmetic goes out of range, "
5921                         "prohibited for !root; off=%d\n", regno, off);
5922                 return -EACCES;
5923         }
5924
5925         return 0;
5926 }
5927
5928 static int sanitize_check_bounds(struct bpf_verifier_env *env,
5929                                  const struct bpf_insn *insn,
5930                                  const struct bpf_reg_state *dst_reg)
5931 {
5932         u32 dst = insn->dst_reg;
5933
5934         /* For unprivileged we require that resulting offset must be in bounds
5935          * in order to be able to sanitize access later on.
5936          */
5937         if (env->bypass_spec_v1)
5938                 return 0;
5939
5940         switch (dst_reg->type) {
5941         case PTR_TO_STACK:
5942                 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
5943                                         dst_reg->off + dst_reg->var_off.value))
5944                         return -EACCES;
5945                 break;
5946         case PTR_TO_MAP_VALUE:
5947                 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
5948                         verbose(env, "R%d pointer arithmetic of map value goes out of range, "
5949                                 "prohibited for !root\n", dst);
5950                         return -EACCES;
5951                 }
5952                 break;
5953         default:
5954                 break;
5955         }
5956
5957         return 0;
5958 }
5959
5960 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
5961  * Caller should also handle BPF_MOV case separately.
5962  * If we return -EACCES, caller may want to try again treating pointer as a
5963  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
5964  */
5965 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
5966                                    struct bpf_insn *insn,
5967                                    const struct bpf_reg_state *ptr_reg,
5968                                    const struct bpf_reg_state *off_reg)
5969 {
5970         struct bpf_verifier_state *vstate = env->cur_state;
5971         struct bpf_func_state *state = vstate->frame[vstate->curframe];
5972         struct bpf_reg_state *regs = state->regs, *dst_reg;
5973         bool known = tnum_is_const(off_reg->var_off);
5974         s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
5975             smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
5976         u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
5977             umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
5978         struct bpf_sanitize_info info = {};
5979         u8 opcode = BPF_OP(insn->code);
5980         u32 dst = insn->dst_reg;
5981         int ret;
5982
5983         dst_reg = &regs[dst];
5984
5985         if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
5986             smin_val > smax_val || umin_val > umax_val) {
5987                 /* Taint dst register if offset had invalid bounds derived from
5988                  * e.g. dead branches.
5989                  */
5990                 __mark_reg_unknown(env, dst_reg);
5991                 return 0;
5992         }
5993
5994         if (BPF_CLASS(insn->code) != BPF_ALU64) {
5995                 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
5996                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
5997                         __mark_reg_unknown(env, dst_reg);
5998                         return 0;
5999                 }
6000
6001                 verbose(env,
6002                         "R%d 32-bit pointer arithmetic prohibited\n",
6003                         dst);
6004                 return -EACCES;
6005         }
6006
6007         switch (ptr_reg->type) {
6008         case PTR_TO_MAP_VALUE_OR_NULL:
6009                 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
6010                         dst, reg_type_str[ptr_reg->type]);
6011                 return -EACCES;
6012         case CONST_PTR_TO_MAP:
6013                 /* smin_val represents the known value */
6014                 if (known && smin_val == 0 && opcode == BPF_ADD)
6015                         break;
6016                 fallthrough;
6017         case PTR_TO_PACKET_END:
6018         case PTR_TO_SOCKET:
6019         case PTR_TO_SOCKET_OR_NULL:
6020         case PTR_TO_SOCK_COMMON:
6021         case PTR_TO_SOCK_COMMON_OR_NULL:
6022         case PTR_TO_TCP_SOCK:
6023         case PTR_TO_TCP_SOCK_OR_NULL:
6024         case PTR_TO_XDP_SOCK:
6025                 verbose(env, "R%d pointer arithmetic on %s prohibited\n",
6026                         dst, reg_type_str[ptr_reg->type]);
6027                 return -EACCES;
6028         default:
6029                 break;
6030         }
6031
6032         /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
6033          * The id may be overwritten later if we create a new variable offset.
6034          */
6035         dst_reg->type = ptr_reg->type;
6036         dst_reg->id = ptr_reg->id;
6037
6038         if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
6039             !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
6040                 return -EINVAL;
6041
6042         /* pointer types do not carry 32-bit bounds at the moment. */
6043         __mark_reg32_unbounded(dst_reg);
6044
6045         if (sanitize_needed(opcode)) {
6046                 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
6047                                        &info, false);
6048                 if (ret < 0)
6049                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
6050         }
6051
6052         switch (opcode) {
6053         case BPF_ADD:
6054                 /* We can take a fixed offset as long as it doesn't overflow
6055                  * the s32 'off' field
6056                  */
6057                 if (known && (ptr_reg->off + smin_val ==
6058                               (s64)(s32)(ptr_reg->off + smin_val))) {
6059                         /* pointer += K.  Accumulate it into fixed offset */
6060                         dst_reg->smin_value = smin_ptr;
6061                         dst_reg->smax_value = smax_ptr;
6062                         dst_reg->umin_value = umin_ptr;
6063                         dst_reg->umax_value = umax_ptr;
6064                         dst_reg->var_off = ptr_reg->var_off;
6065                         dst_reg->off = ptr_reg->off + smin_val;
6066                         dst_reg->raw = ptr_reg->raw;
6067                         break;
6068                 }
6069                 /* A new variable offset is created.  Note that off_reg->off
6070                  * == 0, since it's a scalar.
6071                  * dst_reg gets the pointer type and since some positive
6072                  * integer value was added to the pointer, give it a new 'id'
6073                  * if it's a PTR_TO_PACKET.
6074                  * this creates a new 'base' pointer, off_reg (variable) gets
6075                  * added into the variable offset, and we copy the fixed offset
6076                  * from ptr_reg.
6077                  */
6078                 if (signed_add_overflows(smin_ptr, smin_val) ||
6079                     signed_add_overflows(smax_ptr, smax_val)) {
6080                         dst_reg->smin_value = S64_MIN;
6081                         dst_reg->smax_value = S64_MAX;
6082                 } else {
6083                         dst_reg->smin_value = smin_ptr + smin_val;
6084                         dst_reg->smax_value = smax_ptr + smax_val;
6085                 }
6086                 if (umin_ptr + umin_val < umin_ptr ||
6087                     umax_ptr + umax_val < umax_ptr) {
6088                         dst_reg->umin_value = 0;
6089                         dst_reg->umax_value = U64_MAX;
6090                 } else {
6091                         dst_reg->umin_value = umin_ptr + umin_val;
6092                         dst_reg->umax_value = umax_ptr + umax_val;
6093                 }
6094                 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
6095                 dst_reg->off = ptr_reg->off;
6096                 dst_reg->raw = ptr_reg->raw;
6097                 if (reg_is_pkt_pointer(ptr_reg)) {
6098                         dst_reg->id = ++env->id_gen;
6099                         /* something was added to pkt_ptr, set range to zero */
6100                         dst_reg->raw = 0;
6101                 }
6102                 break;
6103         case BPF_SUB:
6104                 if (dst_reg == off_reg) {
6105                         /* scalar -= pointer.  Creates an unknown scalar */
6106                         verbose(env, "R%d tried to subtract pointer from scalar\n",
6107                                 dst);
6108                         return -EACCES;
6109                 }
6110                 /* We don't allow subtraction from FP, because (according to
6111                  * test_verifier.c test "invalid fp arithmetic", JITs might not
6112                  * be able to deal with it.
6113                  */
6114                 if (ptr_reg->type == PTR_TO_STACK) {
6115                         verbose(env, "R%d subtraction from stack pointer prohibited\n",
6116                                 dst);
6117                         return -EACCES;
6118                 }
6119                 if (known && (ptr_reg->off - smin_val ==
6120                               (s64)(s32)(ptr_reg->off - smin_val))) {
6121                         /* pointer -= K.  Subtract it from fixed offset */
6122                         dst_reg->smin_value = smin_ptr;
6123                         dst_reg->smax_value = smax_ptr;
6124                         dst_reg->umin_value = umin_ptr;
6125                         dst_reg->umax_value = umax_ptr;
6126                         dst_reg->var_off = ptr_reg->var_off;
6127                         dst_reg->id = ptr_reg->id;
6128                         dst_reg->off = ptr_reg->off - smin_val;
6129                         dst_reg->raw = ptr_reg->raw;
6130                         break;
6131                 }
6132                 /* A new variable offset is created.  If the subtrahend is known
6133                  * nonnegative, then any reg->range we had before is still good.
6134                  */
6135                 if (signed_sub_overflows(smin_ptr, smax_val) ||
6136                     signed_sub_overflows(smax_ptr, smin_val)) {
6137                         /* Overflow possible, we know nothing */
6138                         dst_reg->smin_value = S64_MIN;
6139                         dst_reg->smax_value = S64_MAX;
6140                 } else {
6141                         dst_reg->smin_value = smin_ptr - smax_val;
6142                         dst_reg->smax_value = smax_ptr - smin_val;
6143                 }
6144                 if (umin_ptr < umax_val) {
6145                         /* Overflow possible, we know nothing */
6146                         dst_reg->umin_value = 0;
6147                         dst_reg->umax_value = U64_MAX;
6148                 } else {
6149                         /* Cannot overflow (as long as bounds are consistent) */
6150                         dst_reg->umin_value = umin_ptr - umax_val;
6151                         dst_reg->umax_value = umax_ptr - umin_val;
6152                 }
6153                 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
6154                 dst_reg->off = ptr_reg->off;
6155                 dst_reg->raw = ptr_reg->raw;
6156                 if (reg_is_pkt_pointer(ptr_reg)) {
6157                         dst_reg->id = ++env->id_gen;
6158                         /* something was added to pkt_ptr, set range to zero */
6159                         if (smin_val < 0)
6160                                 dst_reg->raw = 0;
6161                 }
6162                 break;
6163         case BPF_AND:
6164         case BPF_OR:
6165         case BPF_XOR:
6166                 /* bitwise ops on pointers are troublesome, prohibit. */
6167                 verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
6168                         dst, bpf_alu_string[opcode >> 4]);
6169                 return -EACCES;
6170         default:
6171                 /* other operators (e.g. MUL,LSH) produce non-pointer results */
6172                 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
6173                         dst, bpf_alu_string[opcode >> 4]);
6174                 return -EACCES;
6175         }
6176
6177         if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
6178                 return -EINVAL;
6179
6180         __update_reg_bounds(dst_reg);
6181         __reg_deduce_bounds(dst_reg);
6182         __reg_bound_offset(dst_reg);
6183
6184         if (sanitize_check_bounds(env, insn, dst_reg) < 0)
6185                 return -EACCES;
6186         if (sanitize_needed(opcode)) {
6187                 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
6188                                        &info, true);
6189                 if (ret < 0)
6190                         return sanitize_err(env, insn, ret, off_reg, dst_reg);
6191         }
6192
6193         return 0;
6194 }
6195
6196 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
6197                                  struct bpf_reg_state *src_reg)
6198 {
6199         s32 smin_val = src_reg->s32_min_value;
6200         s32 smax_val = src_reg->s32_max_value;
6201         u32 umin_val = src_reg->u32_min_value;
6202         u32 umax_val = src_reg->u32_max_value;
6203
6204         if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6205             signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6206                 dst_reg->s32_min_value = S32_MIN;
6207                 dst_reg->s32_max_value = S32_MAX;
6208         } else {
6209                 dst_reg->s32_min_value += smin_val;
6210                 dst_reg->s32_max_value += smax_val;
6211         }
6212         if (dst_reg->u32_min_value + umin_val < umin_val ||
6213             dst_reg->u32_max_value + umax_val < umax_val) {
6214                 dst_reg->u32_min_value = 0;
6215                 dst_reg->u32_max_value = U32_MAX;
6216         } else {
6217                 dst_reg->u32_min_value += umin_val;
6218                 dst_reg->u32_max_value += umax_val;
6219         }
6220 }
6221
6222 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
6223                                struct bpf_reg_state *src_reg)
6224 {
6225         s64 smin_val = src_reg->smin_value;
6226         s64 smax_val = src_reg->smax_value;
6227         u64 umin_val = src_reg->umin_value;
6228         u64 umax_val = src_reg->umax_value;
6229
6230         if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
6231             signed_add_overflows(dst_reg->smax_value, smax_val)) {
6232                 dst_reg->smin_value = S64_MIN;
6233                 dst_reg->smax_value = S64_MAX;
6234         } else {
6235                 dst_reg->smin_value += smin_val;
6236                 dst_reg->smax_value += smax_val;
6237         }
6238         if (dst_reg->umin_value + umin_val < umin_val ||
6239             dst_reg->umax_value + umax_val < umax_val) {
6240                 dst_reg->umin_value = 0;
6241                 dst_reg->umax_value = U64_MAX;
6242         } else {
6243                 dst_reg->umin_value += umin_val;
6244                 dst_reg->umax_value += umax_val;
6245         }
6246 }
6247
6248 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
6249                                  struct bpf_reg_state *src_reg)
6250 {
6251         s32 smin_val = src_reg->s32_min_value;
6252         s32 smax_val = src_reg->s32_max_value;
6253         u32 umin_val = src_reg->u32_min_value;
6254         u32 umax_val = src_reg->u32_max_value;
6255
6256         if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6257             signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6258                 /* Overflow possible, we know nothing */
6259                 dst_reg->s32_min_value = S32_MIN;
6260                 dst_reg->s32_max_value = S32_MAX;
6261         } else {
6262                 dst_reg->s32_min_value -= smax_val;
6263                 dst_reg->s32_max_value -= smin_val;
6264         }
6265         if (dst_reg->u32_min_value < umax_val) {
6266                 /* Overflow possible, we know nothing */
6267                 dst_reg->u32_min_value = 0;
6268                 dst_reg->u32_max_value = U32_MAX;
6269         } else {
6270                 /* Cannot overflow (as long as bounds are consistent) */
6271                 dst_reg->u32_min_value -= umax_val;
6272                 dst_reg->u32_max_value -= umin_val;
6273         }
6274 }
6275
6276 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
6277                                struct bpf_reg_state *src_reg)
6278 {
6279         s64 smin_val = src_reg->smin_value;
6280         s64 smax_val = src_reg->smax_value;
6281         u64 umin_val = src_reg->umin_value;
6282         u64 umax_val = src_reg->umax_value;
6283
6284         if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
6285             signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6286                 /* Overflow possible, we know nothing */
6287                 dst_reg->smin_value = S64_MIN;
6288                 dst_reg->smax_value = S64_MAX;
6289         } else {
6290                 dst_reg->smin_value -= smax_val;
6291                 dst_reg->smax_value -= smin_val;
6292         }
6293         if (dst_reg->umin_value < umax_val) {
6294                 /* Overflow possible, we know nothing */
6295                 dst_reg->umin_value = 0;
6296                 dst_reg->umax_value = U64_MAX;
6297         } else {
6298                 /* Cannot overflow (as long as bounds are consistent) */
6299                 dst_reg->umin_value -= umax_val;
6300                 dst_reg->umax_value -= umin_val;
6301         }
6302 }
6303
6304 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
6305                                  struct bpf_reg_state *src_reg)
6306 {
6307         s32 smin_val = src_reg->s32_min_value;
6308         u32 umin_val = src_reg->u32_min_value;
6309         u32 umax_val = src_reg->u32_max_value;
6310
6311         if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6312                 /* Ain't nobody got time to multiply that sign */
6313                 __mark_reg32_unbounded(dst_reg);
6314                 return;
6315         }
6316         /* Both values are positive, so we can work with unsigned and
6317          * copy the result to signed (unless it exceeds S32_MAX).
6318          */
6319         if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6320                 /* Potential overflow, we know nothing */
6321                 __mark_reg32_unbounded(dst_reg);
6322                 return;
6323         }
6324         dst_reg->u32_min_value *= umin_val;
6325         dst_reg->u32_max_value *= umax_val;
6326         if (dst_reg->u32_max_value > S32_MAX) {
6327                 /* Overflow possible, we know nothing */
6328                 dst_reg->s32_min_value = S32_MIN;
6329                 dst_reg->s32_max_value = S32_MAX;
6330         } else {
6331                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6332                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6333         }
6334 }
6335
6336 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
6337                                struct bpf_reg_state *src_reg)
6338 {
6339         s64 smin_val = src_reg->smin_value;
6340         u64 umin_val = src_reg->umin_value;
6341         u64 umax_val = src_reg->umax_value;
6342
6343         if (smin_val < 0 || dst_reg->smin_value < 0) {
6344                 /* Ain't nobody got time to multiply that sign */
6345                 __mark_reg64_unbounded(dst_reg);
6346                 return;
6347         }
6348         /* Both values are positive, so we can work with unsigned and
6349          * copy the result to signed (unless it exceeds S64_MAX).
6350          */
6351         if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6352                 /* Potential overflow, we know nothing */
6353                 __mark_reg64_unbounded(dst_reg);
6354                 return;
6355         }
6356         dst_reg->umin_value *= umin_val;
6357         dst_reg->umax_value *= umax_val;
6358         if (dst_reg->umax_value > S64_MAX) {
6359                 /* Overflow possible, we know nothing */
6360                 dst_reg->smin_value = S64_MIN;
6361                 dst_reg->smax_value = S64_MAX;
6362         } else {
6363                 dst_reg->smin_value = dst_reg->umin_value;
6364                 dst_reg->smax_value = dst_reg->umax_value;
6365         }
6366 }
6367
6368 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
6369                                  struct bpf_reg_state *src_reg)
6370 {
6371         bool src_known = tnum_subreg_is_const(src_reg->var_off);
6372         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6373         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6374         s32 smin_val = src_reg->s32_min_value;
6375         u32 umax_val = src_reg->u32_max_value;
6376
6377         if (src_known && dst_known) {
6378                 __mark_reg32_known(dst_reg, var32_off.value);
6379                 return;
6380         }
6381
6382         /* We get our minimum from the var_off, since that's inherently
6383          * bitwise.  Our maximum is the minimum of the operands' maxima.
6384          */
6385         dst_reg->u32_min_value = var32_off.value;
6386         dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6387         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6388                 /* Lose signed bounds when ANDing negative numbers,
6389                  * ain't nobody got time for that.
6390                  */
6391                 dst_reg->s32_min_value = S32_MIN;
6392                 dst_reg->s32_max_value = S32_MAX;
6393         } else {
6394                 /* ANDing two positives gives a positive, so safe to
6395                  * cast result into s64.
6396                  */
6397                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6398                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6399         }
6400 }
6401
6402 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
6403                                struct bpf_reg_state *src_reg)
6404 {
6405         bool src_known = tnum_is_const(src_reg->var_off);
6406         bool dst_known = tnum_is_const(dst_reg->var_off);
6407         s64 smin_val = src_reg->smin_value;
6408         u64 umax_val = src_reg->umax_value;
6409
6410         if (src_known && dst_known) {
6411                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
6412                 return;
6413         }
6414
6415         /* We get our minimum from the var_off, since that's inherently
6416          * bitwise.  Our maximum is the minimum of the operands' maxima.
6417          */
6418         dst_reg->umin_value = dst_reg->var_off.value;
6419         dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6420         if (dst_reg->smin_value < 0 || smin_val < 0) {
6421                 /* Lose signed bounds when ANDing negative numbers,
6422                  * ain't nobody got time for that.
6423                  */
6424                 dst_reg->smin_value = S64_MIN;
6425                 dst_reg->smax_value = S64_MAX;
6426         } else {
6427                 /* ANDing two positives gives a positive, so safe to
6428                  * cast result into s64.
6429                  */
6430                 dst_reg->smin_value = dst_reg->umin_value;
6431                 dst_reg->smax_value = dst_reg->umax_value;
6432         }
6433         /* We may learn something more from the var_off */
6434         __update_reg_bounds(dst_reg);
6435 }
6436
6437 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
6438                                 struct bpf_reg_state *src_reg)
6439 {
6440         bool src_known = tnum_subreg_is_const(src_reg->var_off);
6441         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6442         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6443         s32 smin_val = src_reg->s32_min_value;
6444         u32 umin_val = src_reg->u32_min_value;
6445
6446         if (src_known && dst_known) {
6447                 __mark_reg32_known(dst_reg, var32_off.value);
6448                 return;
6449         }
6450
6451         /* We get our maximum from the var_off, and our minimum is the
6452          * maximum of the operands' minima
6453          */
6454         dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6455         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6456         if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6457                 /* Lose signed bounds when ORing negative numbers,
6458                  * ain't nobody got time for that.
6459                  */
6460                 dst_reg->s32_min_value = S32_MIN;
6461                 dst_reg->s32_max_value = S32_MAX;
6462         } else {
6463                 /* ORing two positives gives a positive, so safe to
6464                  * cast result into s64.
6465                  */
6466                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6467                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6468         }
6469 }
6470
6471 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
6472                               struct bpf_reg_state *src_reg)
6473 {
6474         bool src_known = tnum_is_const(src_reg->var_off);
6475         bool dst_known = tnum_is_const(dst_reg->var_off);
6476         s64 smin_val = src_reg->smin_value;
6477         u64 umin_val = src_reg->umin_value;
6478
6479         if (src_known && dst_known) {
6480                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
6481                 return;
6482         }
6483
6484         /* We get our maximum from the var_off, and our minimum is the
6485          * maximum of the operands' minima
6486          */
6487         dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6488         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6489         if (dst_reg->smin_value < 0 || smin_val < 0) {
6490                 /* Lose signed bounds when ORing negative numbers,
6491                  * ain't nobody got time for that.
6492                  */
6493                 dst_reg->smin_value = S64_MIN;
6494                 dst_reg->smax_value = S64_MAX;
6495         } else {
6496                 /* ORing two positives gives a positive, so safe to
6497                  * cast result into s64.
6498                  */
6499                 dst_reg->smin_value = dst_reg->umin_value;
6500                 dst_reg->smax_value = dst_reg->umax_value;
6501         }
6502         /* We may learn something more from the var_off */
6503         __update_reg_bounds(dst_reg);
6504 }
6505
6506 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
6507                                  struct bpf_reg_state *src_reg)
6508 {
6509         bool src_known = tnum_subreg_is_const(src_reg->var_off);
6510         bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6511         struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6512         s32 smin_val = src_reg->s32_min_value;
6513
6514         if (src_known && dst_known) {
6515                 __mark_reg32_known(dst_reg, var32_off.value);
6516                 return;
6517         }
6518
6519         /* We get both minimum and maximum from the var32_off. */
6520         dst_reg->u32_min_value = var32_off.value;
6521         dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6522
6523         if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6524                 /* XORing two positive sign numbers gives a positive,
6525                  * so safe to cast u32 result into s32.
6526                  */
6527                 dst_reg->s32_min_value = dst_reg->u32_min_value;
6528                 dst_reg->s32_max_value = dst_reg->u32_max_value;
6529         } else {
6530                 dst_reg->s32_min_value = S32_MIN;
6531                 dst_reg->s32_max_value = S32_MAX;
6532         }
6533 }
6534
6535 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
6536                                struct bpf_reg_state *src_reg)
6537 {
6538         bool src_known = tnum_is_const(src_reg->var_off);
6539         bool dst_known = tnum_is_const(dst_reg->var_off);
6540         s64 smin_val = src_reg->smin_value;
6541
6542         if (src_known && dst_known) {
6543                 /* dst_reg->var_off.value has been updated earlier */
6544                 __mark_reg_known(dst_reg, dst_reg->var_off.value);
6545                 return;
6546         }
6547
6548         /* We get both minimum and maximum from the var_off. */
6549         dst_reg->umin_value = dst_reg->var_off.value;
6550         dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6551
6552         if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6553                 /* XORing two positive sign numbers gives a positive,
6554                  * so safe to cast u64 result into s64.
6555                  */
6556                 dst_reg->smin_value = dst_reg->umin_value;
6557                 dst_reg->smax_value = dst_reg->umax_value;
6558         } else {
6559                 dst_reg->smin_value = S64_MIN;
6560                 dst_reg->smax_value = S64_MAX;
6561         }
6562
6563         __update_reg_bounds(dst_reg);
6564 }
6565
6566 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6567                                    u64 umin_val, u64 umax_val)
6568 {
6569         /* We lose all sign bit information (except what we can pick
6570          * up from var_off)
6571          */
6572         dst_reg->s32_min_value = S32_MIN;
6573         dst_reg->s32_max_value = S32_MAX;
6574         /* If we might shift our top bit out, then we know nothing */
6575         if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
6576                 dst_reg->u32_min_value = 0;
6577                 dst_reg->u32_max_value = U32_MAX;
6578         } else {
6579                 dst_reg->u32_min_value <<= umin_val;
6580                 dst_reg->u32_max_value <<= umax_val;
6581         }
6582 }
6583
6584 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6585                                  struct bpf_reg_state *src_reg)
6586 {
6587         u32 umax_val = src_reg->u32_max_value;
6588         u32 umin_val = src_reg->u32_min_value;
6589         /* u32 alu operation will zext upper bits */
6590         struct tnum subreg = tnum_subreg(dst_reg->var_off);
6591
6592         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6593         dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6594         /* Not required but being careful mark reg64 bounds as unknown so
6595          * that we are forced to pick them up from tnum and zext later and
6596          * if some path skips this step we are still safe.
6597          */
6598         __mark_reg64_unbounded(dst_reg);
6599         __update_reg32_bounds(dst_reg);
6600 }
6601
6602 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
6603                                    u64 umin_val, u64 umax_val)
6604 {
6605         /* Special case <<32 because it is a common compiler pattern to sign
6606          * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6607          * positive we know this shift will also be positive so we can track
6608          * bounds correctly. Otherwise we lose all sign bit information except
6609          * what we can pick up from var_off. Perhaps we can generalize this
6610          * later to shifts of any length.
6611          */
6612         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
6613                 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
6614         else
6615                 dst_reg->smax_value = S64_MAX;
6616
6617         if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
6618                 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
6619         else
6620                 dst_reg->smin_value = S64_MIN;
6621
6622         /* If we might shift our top bit out, then we know nothing */
6623         if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
6624                 dst_reg->umin_value = 0;
6625                 dst_reg->umax_value = U64_MAX;
6626         } else {
6627                 dst_reg->umin_value <<= umin_val;
6628                 dst_reg->umax_value <<= umax_val;
6629         }
6630 }
6631
6632 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
6633                                struct bpf_reg_state *src_reg)
6634 {
6635         u64 umax_val = src_reg->umax_value;
6636         u64 umin_val = src_reg->umin_value;
6637
6638         /* scalar64 calc uses 32bit unshifted bounds so must be called first */
6639         __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6640         __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6641
6642         dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6643         /* We may learn something more from the var_off */
6644         __update_reg_bounds(dst_reg);
6645 }
6646
6647 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
6648                                  struct bpf_reg_state *src_reg)
6649 {
6650         struct tnum subreg = tnum_subreg(dst_reg->var_off);
6651         u32 umax_val = src_reg->u32_max_value;
6652         u32 umin_val = src_reg->u32_min_value;
6653
6654         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6655          * be negative, then either:
6656          * 1) src_reg might be zero, so the sign bit of the result is
6657          *    unknown, so we lose our signed bounds
6658          * 2) it's known negative, thus the unsigned bounds capture the
6659          *    signed bounds
6660          * 3) the signed bounds cross zero, so they tell us nothing
6661          *    about the result
6662          * If the value in dst_reg is known nonnegative, then again the
6663          * unsigned bounts capture the signed bounds.
6664          * Thus, in all cases it suffices to blow away our signed bounds
6665          * and rely on inferring new ones from the unsigned bounds and
6666          * var_off of the result.
6667          */
6668         dst_reg->s32_min_value = S32_MIN;
6669         dst_reg->s32_max_value = S32_MAX;
6670
6671         dst_reg->var_off = tnum_rshift(subreg, umin_val);
6672         dst_reg->u32_min_value >>= umax_val;
6673         dst_reg->u32_max_value >>= umin_val;
6674
6675         __mark_reg64_unbounded(dst_reg);
6676         __update_reg32_bounds(dst_reg);
6677 }
6678
6679 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
6680                                struct bpf_reg_state *src_reg)
6681 {
6682         u64 umax_val = src_reg->umax_value;
6683         u64 umin_val = src_reg->umin_value;
6684
6685         /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6686          * be negative, then either:
6687          * 1) src_reg might be zero, so the sign bit of the result is
6688          *    unknown, so we lose our signed bounds
6689          * 2) it's known negative, thus the unsigned bounds capture the
6690          *    signed bounds
6691          * 3) the signed bounds cross zero, so they tell us nothing
6692          *    about the result
6693          * If the value in dst_reg is known nonnegative, then again the
6694          * unsigned bounts capture the signed bounds.
6695          * Thus, in all cases it suffices to blow away our signed bounds
6696          * and rely on inferring new ones from the unsigned bounds and
6697          * var_off of the result.
6698          */
6699         dst_reg->smin_value = S64_MIN;
6700         dst_reg->smax_value = S64_MAX;
6701         dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6702         dst_reg->umin_value >>= umax_val;
6703         dst_reg->umax_value >>= umin_val;
6704
6705         /* Its not easy to operate on alu32 bounds here because it depends
6706          * on bits being shifted in. Take easy way out and mark unbounded
6707          * so we can recalculate later from tnum.
6708          */
6709         __mark_reg32_unbounded(dst_reg);
6710         __update_reg_bounds(dst_reg);
6711 }
6712
6713 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
6714                                   struct bpf_reg_state *src_reg)
6715 {
6716         u64 umin_val = src_reg->u32_min_value;
6717
6718         /* Upon reaching here, src_known is true and
6719          * umax_val is equal to umin_val.
6720          */
6721         dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6722         dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6723
6724         dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
6725
6726         /* blow away the dst_reg umin_value/umax_value and rely on
6727          * dst_reg var_off to refine the result.
6728          */
6729         dst_reg->u32_min_value = 0;
6730         dst_reg->u32_max_value = U32_MAX;
6731
6732         __mark_reg64_unbounded(dst_reg);
6733         __update_reg32_bounds(dst_reg);
6734 }
6735
6736 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
6737                                 struct bpf_reg_state *src_reg)
6738 {
6739         u64 umin_val = src_reg->umin_value;
6740
6741         /* Upon reaching here, src_known is true and umax_val is equal
6742          * to umin_val.
6743          */
6744         dst_reg->smin_value >>= umin_val;
6745         dst_reg->smax_value >>= umin_val;
6746
6747         dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
6748
6749         /* blow away the dst_reg umin_value/umax_value and rely on
6750          * dst_reg var_off to refine the result.
6751          */
6752         dst_reg->umin_value = 0;
6753         dst_reg->umax_value = U64_MAX;
6754
6755         /* Its not easy to operate on alu32 bounds here because it depends
6756          * on bits being shifted in from upper 32-bits. Take easy way out
6757          * and mark unbounded so we can recalculate later from tnum.
6758          */
6759         __mark_reg32_unbounded(dst_reg);
6760         __update_reg_bounds(dst_reg);
6761 }
6762
6763 /* WARNING: This function does calculations on 64-bit values, but the actual
6764  * execution may occur on 32-bit values. Therefore, things like bitshifts
6765  * need extra checks in the 32-bit case.
6766  */
6767 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
6768                                       struct bpf_insn *insn,
6769                                       struct bpf_reg_state *dst_reg,
6770                                       struct bpf_reg_state src_reg)
6771 {
6772         struct bpf_reg_state *regs = cur_regs(env);
6773         u8 opcode = BPF_OP(insn->code);
6774         bool src_known;
6775         s64 smin_val, smax_val;
6776         u64 umin_val, umax_val;
6777         s32 s32_min_val, s32_max_val;
6778         u32 u32_min_val, u32_max_val;
6779         u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
6780         bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6781         int ret;
6782
6783         smin_val = src_reg.smin_value;
6784         smax_val = src_reg.smax_value;
6785         umin_val = src_reg.umin_value;
6786         umax_val = src_reg.umax_value;
6787
6788         s32_min_val = src_reg.s32_min_value;
6789         s32_max_val = src_reg.s32_max_value;
6790         u32_min_val = src_reg.u32_min_value;
6791         u32_max_val = src_reg.u32_max_value;
6792
6793         if (alu32) {
6794                 src_known = tnum_subreg_is_const(src_reg.var_off);
6795                 if ((src_known &&
6796                      (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
6797                     s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
6798                         /* Taint dst register if offset had invalid bounds
6799                          * derived from e.g. dead branches.
6800                          */
6801                         __mark_reg_unknown(env, dst_reg);
6802                         return 0;
6803                 }
6804         } else {
6805                 src_known = tnum_is_const(src_reg.var_off);
6806                 if ((src_known &&
6807                      (smin_val != smax_val || umin_val != umax_val)) ||
6808                     smin_val > smax_val || umin_val > umax_val) {
6809                         /* Taint dst register if offset had invalid bounds
6810                          * derived from e.g. dead branches.
6811                          */
6812                         __mark_reg_unknown(env, dst_reg);
6813                         return 0;
6814                 }
6815         }
6816
6817         if (!src_known &&
6818             opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6819                 __mark_reg_unknown(env, dst_reg);
6820                 return 0;
6821         }
6822
6823         if (sanitize_needed(opcode)) {
6824                 ret = sanitize_val_alu(env, insn);
6825                 if (ret < 0)
6826                         return sanitize_err(env, insn, ret, NULL, NULL);
6827         }
6828
6829         /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6830          * There are two classes of instructions: The first class we track both
6831          * alu32 and alu64 sign/unsigned bounds independently this provides the
6832          * greatest amount of precision when alu operations are mixed with jmp32
6833          * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6834          * and BPF_OR. This is possible because these ops have fairly easy to
6835          * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6836          * See alu32 verifier tests for examples. The second class of
6837          * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6838          * with regards to tracking sign/unsigned bounds because the bits may
6839          * cross subreg boundaries in the alu64 case. When this happens we mark
6840          * the reg unbounded in the subreg bound space and use the resulting
6841          * tnum to calculate an approximation of the sign/unsigned bounds.
6842          */
6843         switch (opcode) {
6844         case BPF_ADD:
6845                 scalar32_min_max_add(dst_reg, &src_reg);
6846                 scalar_min_max_add(dst_reg, &src_reg);
6847                 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6848                 break;
6849         case BPF_SUB:
6850                 scalar32_min_max_sub(dst_reg, &src_reg);
6851                 scalar_min_max_sub(dst_reg, &src_reg);
6852                 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6853                 break;
6854         case BPF_MUL:
6855                 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
6856                 scalar32_min_max_mul(dst_reg, &src_reg);
6857                 scalar_min_max_mul(dst_reg, &src_reg);
6858                 break;
6859         case BPF_AND:
6860                 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
6861                 scalar32_min_max_and(dst_reg, &src_reg);
6862                 scalar_min_max_and(dst_reg, &src_reg);
6863                 break;
6864         case BPF_OR:
6865                 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
6866                 scalar32_min_max_or(dst_reg, &src_reg);
6867                 scalar_min_max_or(dst_reg, &src_reg);
6868                 break;
6869         case BPF_XOR:
6870                 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6871                 scalar32_min_max_xor(dst_reg, &src_reg);
6872                 scalar_min_max_xor(dst_reg, &src_reg);
6873                 break;
6874         case BPF_LSH:
6875                 if (umax_val >= insn_bitness) {
6876                         /* Shifts greater than 31 or 63 are undefined.
6877                          * This includes shifts by a negative number.
6878                          */
6879                         mark_reg_unknown(env, regs, insn->dst_reg);
6880                         break;
6881                 }
6882                 if (alu32)
6883                         scalar32_min_max_lsh(dst_reg, &src_reg);
6884                 else
6885                         scalar_min_max_lsh(dst_reg, &src_reg);
6886                 break;
6887         case BPF_RSH:
6888                 if (umax_val >= insn_bitness) {
6889                         /* Shifts greater than 31 or 63 are undefined.
6890                          * This includes shifts by a negative number.
6891                          */
6892                         mark_reg_unknown(env, regs, insn->dst_reg);
6893                         break;
6894                 }
6895                 if (alu32)
6896                         scalar32_min_max_rsh(dst_reg, &src_reg);
6897                 else
6898                         scalar_min_max_rsh(dst_reg, &src_reg);
6899                 break;
6900         case BPF_ARSH:
6901                 if (umax_val >= insn_bitness) {
6902                         /* Shifts greater than 31 or 63 are undefined.
6903                          * This includes shifts by a negative number.
6904                          */
6905                         mark_reg_unknown(env, regs, insn->dst_reg);
6906                         break;
6907                 }
6908                 if (alu32)
6909                         scalar32_min_max_arsh(dst_reg, &src_reg);
6910                 else
6911                         scalar_min_max_arsh(dst_reg, &src_reg);
6912                 break;
6913         default:
6914                 mark_reg_unknown(env, regs, insn->dst_reg);
6915                 break;
6916         }
6917
6918         /* ALU32 ops are zero extended into 64bit register */
6919         if (alu32)
6920                 zext_32_to_64(dst_reg);
6921
6922         __update_reg_bounds(dst_reg);
6923         __reg_deduce_bounds(dst_reg);
6924         __reg_bound_offset(dst_reg);
6925         return 0;
6926 }
6927
6928 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
6929  * and var_off.
6930  */
6931 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
6932                                    struct bpf_insn *insn)
6933 {
6934         struct bpf_verifier_state *vstate = env->cur_state;
6935         struct bpf_func_state *state = vstate->frame[vstate->curframe];
6936         struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6937         struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
6938         u8 opcode = BPF_OP(insn->code);
6939         int err;
6940
6941         dst_reg = &regs[insn->dst_reg];
6942         src_reg = NULL;
6943         if (dst_reg->type != SCALAR_VALUE)
6944                 ptr_reg = dst_reg;
6945         else
6946                 /* Make sure ID is cleared otherwise dst_reg min/max could be
6947                  * incorrectly propagated into other registers by find_equal_scalars()
6948                  */
6949                 dst_reg->id = 0;
6950         if (BPF_SRC(insn->code) == BPF_X) {
6951                 src_reg = &regs[insn->src_reg];
6952                 if (src_reg->type != SCALAR_VALUE) {
6953                         if (dst_reg->type != SCALAR_VALUE) {
6954                                 /* Combining two pointers by any ALU op yields
6955                                  * an arbitrary scalar. Disallow all math except
6956                                  * pointer subtraction
6957                                  */
6958                                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6959                                         mark_reg_unknown(env, regs, insn->dst_reg);
6960                                         return 0;
6961                                 }
6962                                 verbose(env, "R%d pointer %s pointer prohibited\n",
6963                                         insn->dst_reg,
6964                                         bpf_alu_string[opcode >> 4]);
6965                                 return -EACCES;
6966                         } else {
6967                                 /* scalar += pointer
6968                                  * This is legal, but we have to reverse our
6969                                  * src/dest handling in computing the range
6970                                  */
6971                                 err = mark_chain_precision(env, insn->dst_reg);
6972                                 if (err)
6973                                         return err;
6974                                 return adjust_ptr_min_max_vals(env, insn,
6975                                                                src_reg, dst_reg);
6976                         }
6977                 } else if (ptr_reg) {
6978                         /* pointer += scalar */
6979                         err = mark_chain_precision(env, insn->src_reg);
6980                         if (err)
6981                                 return err;
6982                         return adjust_ptr_min_max_vals(env, insn,
6983                                                        dst_reg, src_reg);
6984                 }
6985         } else {
6986                 /* Pretend the src is a reg with a known value, since we only
6987                  * need to be able to read from this state.
6988                  */
6989                 off_reg.type = SCALAR_VALUE;
6990                 __mark_reg_known(&off_reg, insn->imm);
6991                 src_reg = &off_reg;
6992                 if (ptr_reg) /* pointer += K */
6993                         return adjust_ptr_min_max_vals(env, insn,
6994                                                        ptr_reg, src_reg);
6995         }
6996
6997         /* Got here implies adding two SCALAR_VALUEs */
6998         if (WARN_ON_ONCE(ptr_reg)) {
6999                 print_verifier_state(env, state);
7000                 verbose(env, "verifier internal error: unexpected ptr_reg\n");
7001                 return -EINVAL;
7002         }
7003         if (WARN_ON(!src_reg)) {
7004                 print_verifier_state(env, state);
7005                 verbose(env, "verifier internal error: no src_reg\n");
7006                 return -EINVAL;
7007         }
7008         return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
7009 }
7010
7011 /* check validity of 32-bit and 64-bit arithmetic operations */
7012 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
7013 {
7014         struct bpf_reg_state *regs = cur_regs(env);
7015         u8 opcode = BPF_OP(insn->code);
7016         int err;
7017
7018         if (opcode == BPF_END || opcode == BPF_NEG) {
7019                 if (opcode == BPF_NEG) {
7020                         if (BPF_SRC(insn->code) != 0 ||
7021                             insn->src_reg != BPF_REG_0 ||
7022                             insn->off != 0 || insn->imm != 0) {
7023                                 verbose(env, "BPF_NEG uses reserved fields\n");
7024                                 return -EINVAL;
7025                         }
7026                 } else {
7027                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
7028                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
7029                             BPF_CLASS(insn->code) == BPF_ALU64) {
7030                                 verbose(env, "BPF_END uses reserved fields\n");
7031                                 return -EINVAL;
7032                         }
7033                 }
7034
7035                 /* check src operand */
7036                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7037                 if (err)
7038                         return err;
7039
7040                 if (is_pointer_value(env, insn->dst_reg)) {
7041                         verbose(env, "R%d pointer arithmetic prohibited\n",
7042                                 insn->dst_reg);
7043                         return -EACCES;
7044                 }
7045
7046                 /* check dest operand */
7047                 err = check_reg_arg(env, insn->dst_reg, DST_OP);
7048                 if (err)
7049                         return err;
7050
7051         } else if (opcode == BPF_MOV) {
7052
7053                 if (BPF_SRC(insn->code) == BPF_X) {
7054                         if (insn->imm != 0 || insn->off != 0) {
7055                                 verbose(env, "BPF_MOV uses reserved fields\n");
7056                                 return -EINVAL;
7057                         }
7058
7059                         /* check src operand */
7060                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7061                         if (err)
7062                                 return err;
7063                 } else {
7064                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7065                                 verbose(env, "BPF_MOV uses reserved fields\n");
7066                                 return -EINVAL;
7067                         }
7068                 }
7069
7070                 /* check dest operand, mark as required later */
7071                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7072                 if (err)
7073                         return err;
7074
7075                 if (BPF_SRC(insn->code) == BPF_X) {
7076                         struct bpf_reg_state *src_reg = regs + insn->src_reg;
7077                         struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
7078
7079                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
7080                                 /* case: R1 = R2
7081                                  * copy register state to dest reg
7082                                  */
7083                                 if (src_reg->type == SCALAR_VALUE && !src_reg->id)
7084                                         /* Assign src and dst registers the same ID
7085                                          * that will be used by find_equal_scalars()
7086                                          * to propagate min/max range.
7087                                          */
7088                                         src_reg->id = ++env->id_gen;
7089                                 *dst_reg = *src_reg;
7090                                 dst_reg->live |= REG_LIVE_WRITTEN;
7091                                 dst_reg->subreg_def = DEF_NOT_SUBREG;
7092                         } else {
7093                                 /* R1 = (u32) R2 */
7094                                 if (is_pointer_value(env, insn->src_reg)) {
7095                                         verbose(env,
7096                                                 "R%d partial copy of pointer\n",
7097                                                 insn->src_reg);
7098                                         return -EACCES;
7099                                 } else if (src_reg->type == SCALAR_VALUE) {
7100                                         *dst_reg = *src_reg;
7101                                         /* Make sure ID is cleared otherwise
7102                                          * dst_reg min/max could be incorrectly
7103                                          * propagated into src_reg by find_equal_scalars()
7104                                          */
7105                                         dst_reg->id = 0;
7106                                         dst_reg->live |= REG_LIVE_WRITTEN;
7107                                         dst_reg->subreg_def = env->insn_idx + 1;
7108                                 } else {
7109                                         mark_reg_unknown(env, regs,
7110                                                          insn->dst_reg);
7111                                 }
7112                                 zext_32_to_64(dst_reg);
7113                         }
7114                 } else {
7115                         /* case: R = imm
7116                          * remember the value we stored into this reg
7117                          */
7118                         /* clear any state __mark_reg_known doesn't set */
7119                         mark_reg_unknown(env, regs, insn->dst_reg);
7120                         regs[insn->dst_reg].type = SCALAR_VALUE;
7121                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
7122                                 __mark_reg_known(regs + insn->dst_reg,
7123                                                  insn->imm);
7124                         } else {
7125                                 __mark_reg_known(regs + insn->dst_reg,
7126                                                  (u32)insn->imm);
7127                         }
7128                 }
7129
7130         } else if (opcode > BPF_END) {
7131                 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
7132                 return -EINVAL;
7133
7134         } else {        /* all other ALU ops: and, sub, xor, add, ... */
7135
7136                 if (BPF_SRC(insn->code) == BPF_X) {
7137                         if (insn->imm != 0 || insn->off != 0) {
7138                                 verbose(env, "BPF_ALU uses reserved fields\n");
7139                                 return -EINVAL;
7140                         }
7141                         /* check src1 operand */
7142                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7143                         if (err)
7144                                 return err;
7145                 } else {
7146                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7147                                 verbose(env, "BPF_ALU uses reserved fields\n");
7148                                 return -EINVAL;
7149                         }
7150                 }
7151
7152                 /* check src2 operand */
7153                 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7154                 if (err)
7155                         return err;
7156
7157                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
7158                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
7159                         verbose(env, "div by zero\n");
7160                         return -EINVAL;
7161                 }
7162
7163                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
7164                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
7165                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
7166
7167                         if (insn->imm < 0 || insn->imm >= size) {
7168                                 verbose(env, "invalid shift %d\n", insn->imm);
7169                                 return -EINVAL;
7170                         }
7171                 }
7172
7173                 /* check dest operand */
7174                 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7175                 if (err)
7176                         return err;
7177
7178                 return adjust_reg_min_max_vals(env, insn);
7179         }
7180
7181         return 0;
7182 }
7183
7184 static void __find_good_pkt_pointers(struct bpf_func_state *state,
7185                                      struct bpf_reg_state *dst_reg,
7186                                      enum bpf_reg_type type, u16 new_range)
7187 {
7188         struct bpf_reg_state *reg;
7189         int i;
7190
7191         for (i = 0; i < MAX_BPF_REG; i++) {
7192                 reg = &state->regs[i];
7193                 if (reg->type == type && reg->id == dst_reg->id)
7194                         /* keep the maximum range already checked */
7195                         reg->range = max(reg->range, new_range);
7196         }
7197
7198         bpf_for_each_spilled_reg(i, state, reg) {
7199                 if (!reg)
7200                         continue;
7201                 if (reg->type == type && reg->id == dst_reg->id)
7202                         reg->range = max(reg->range, new_range);
7203         }
7204 }
7205
7206 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
7207                                    struct bpf_reg_state *dst_reg,
7208                                    enum bpf_reg_type type,
7209                                    bool range_right_open)
7210 {
7211         u16 new_range;
7212         int i;
7213
7214         if (dst_reg->off < 0 ||
7215             (dst_reg->off == 0 && range_right_open))
7216                 /* This doesn't give us any range */
7217                 return;
7218
7219         if (dst_reg->umax_value > MAX_PACKET_OFF ||
7220             dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
7221                 /* Risk of overflow.  For instance, ptr + (1<<63) may be less
7222                  * than pkt_end, but that's because it's also less than pkt.
7223                  */
7224                 return;
7225
7226         new_range = dst_reg->off;
7227         if (range_right_open)
7228                 new_range--;
7229
7230         /* Examples for register markings:
7231          *
7232          * pkt_data in dst register:
7233          *
7234          *   r2 = r3;
7235          *   r2 += 8;
7236          *   if (r2 > pkt_end) goto <handle exception>
7237          *   <access okay>
7238          *
7239          *   r2 = r3;
7240          *   r2 += 8;
7241          *   if (r2 < pkt_end) goto <access okay>
7242          *   <handle exception>
7243          *
7244          *   Where:
7245          *     r2 == dst_reg, pkt_end == src_reg
7246          *     r2=pkt(id=n,off=8,r=0)
7247          *     r3=pkt(id=n,off=0,r=0)
7248          *
7249          * pkt_data in src register:
7250          *
7251          *   r2 = r3;
7252          *   r2 += 8;
7253          *   if (pkt_end >= r2) goto <access okay>
7254          *   <handle exception>
7255          *
7256          *   r2 = r3;
7257          *   r2 += 8;
7258          *   if (pkt_end <= r2) goto <handle exception>
7259          *   <access okay>
7260          *
7261          *   Where:
7262          *     pkt_end == dst_reg, r2 == src_reg
7263          *     r2=pkt(id=n,off=8,r=0)
7264          *     r3=pkt(id=n,off=0,r=0)
7265          *
7266          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
7267          * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
7268          * and [r3, r3 + 8-1) respectively is safe to access depending on
7269          * the check.
7270          */
7271
7272         /* If our ids match, then we must have the same max_value.  And we
7273          * don't care about the other reg's fixed offset, since if it's too big
7274          * the range won't allow anything.
7275          * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
7276          */
7277         for (i = 0; i <= vstate->curframe; i++)
7278                 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type,
7279                                          new_range);
7280 }
7281
7282 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
7283 {
7284         struct tnum subreg = tnum_subreg(reg->var_off);
7285         s32 sval = (s32)val;
7286
7287         switch (opcode) {
7288         case BPF_JEQ:
7289                 if (tnum_is_const(subreg))
7290                         return !!tnum_equals_const(subreg, val);
7291                 break;
7292         case BPF_JNE:
7293                 if (tnum_is_const(subreg))
7294                         return !tnum_equals_const(subreg, val);
7295                 break;
7296         case BPF_JSET:
7297                 if ((~subreg.mask & subreg.value) & val)
7298                         return 1;
7299                 if (!((subreg.mask | subreg.value) & val))
7300                         return 0;
7301                 break;
7302         case BPF_JGT:
7303                 if (reg->u32_min_value > val)
7304                         return 1;
7305                 else if (reg->u32_max_value <= val)
7306                         return 0;
7307                 break;
7308         case BPF_JSGT:
7309                 if (reg->s32_min_value > sval)
7310                         return 1;
7311                 else if (reg->s32_max_value <= sval)
7312                         return 0;
7313                 break;
7314         case BPF_JLT:
7315                 if (reg->u32_max_value < val)
7316                         return 1;
7317                 else if (reg->u32_min_value >= val)
7318                         return 0;
7319                 break;
7320         case BPF_JSLT:
7321                 if (reg->s32_max_value < sval)
7322                         return 1;
7323                 else if (reg->s32_min_value >= sval)
7324                         return 0;
7325                 break;
7326         case BPF_JGE:
7327                 if (reg->u32_min_value >= val)
7328                         return 1;
7329                 else if (reg->u32_max_value < val)
7330                         return 0;
7331                 break;
7332         case BPF_JSGE:
7333                 if (reg->s32_min_value >= sval)
7334                         return 1;
7335                 else if (reg->s32_max_value < sval)
7336                         return 0;
7337                 break;
7338         case BPF_JLE:
7339                 if (reg->u32_max_value <= val)
7340                         return 1;
7341                 else if (reg->u32_min_value > val)
7342                         return 0;
7343                 break;
7344         case BPF_JSLE:
7345                 if (reg->s32_max_value <= sval)
7346                         return 1;
7347                 else if (reg->s32_min_value > sval)
7348                         return 0;
7349                 break;
7350         }
7351
7352         return -1;
7353 }
7354
7355
7356 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7357 {
7358         s64 sval = (s64)val;
7359
7360         switch (opcode) {
7361         case BPF_JEQ:
7362                 if (tnum_is_const(reg->var_off))
7363                         return !!tnum_equals_const(reg->var_off, val);
7364                 break;
7365         case BPF_JNE:
7366                 if (tnum_is_const(reg->var_off))
7367                         return !tnum_equals_const(reg->var_off, val);
7368                 break;
7369         case BPF_JSET:
7370                 if ((~reg->var_off.mask & reg->var_off.value) & val)
7371                         return 1;
7372                 if (!((reg->var_off.mask | reg->var_off.value) & val))
7373                         return 0;
7374                 break;
7375         case BPF_JGT:
7376                 if (reg->umin_value > val)
7377                         return 1;
7378                 else if (reg->umax_value <= val)
7379                         return 0;
7380                 break;
7381         case BPF_JSGT:
7382                 if (reg->smin_value > sval)
7383                         return 1;
7384                 else if (reg->smax_value <= sval)
7385                         return 0;
7386                 break;
7387         case BPF_JLT:
7388                 if (reg->umax_value < val)
7389                         return 1;
7390                 else if (reg->umin_value >= val)
7391                         return 0;
7392                 break;
7393         case BPF_JSLT:
7394                 if (reg->smax_value < sval)
7395                         return 1;
7396                 else if (reg->smin_value >= sval)
7397                         return 0;
7398                 break;
7399         case BPF_JGE:
7400                 if (reg->umin_value >= val)
7401                         return 1;
7402                 else if (reg->umax_value < val)
7403                         return 0;
7404                 break;
7405         case BPF_JSGE:
7406                 if (reg->smin_value >= sval)
7407                         return 1;
7408                 else if (reg->smax_value < sval)
7409                         return 0;
7410                 break;
7411         case BPF_JLE:
7412                 if (reg->umax_value <= val)
7413                         return 1;
7414                 else if (reg->umin_value > val)
7415                         return 0;
7416                 break;
7417         case BPF_JSLE:
7418                 if (reg->smax_value <= sval)
7419                         return 1;
7420                 else if (reg->smin_value > sval)
7421                         return 0;
7422                 break;
7423         }
7424
7425         return -1;
7426 }
7427
7428 /* compute branch direction of the expression "if (reg opcode val) goto target;"
7429  * and return:
7430  *  1 - branch will be taken and "goto target" will be executed
7431  *  0 - branch will not be taken and fall-through to next insn
7432  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7433  *      range [0,10]
7434  */
7435 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
7436                            bool is_jmp32)
7437 {
7438         if (__is_pointer_value(false, reg)) {
7439                 if (!reg_type_not_null(reg->type))
7440                         return -1;
7441
7442                 /* If pointer is valid tests against zero will fail so we can
7443                  * use this to direct branch taken.
7444                  */
7445                 if (val != 0)
7446                         return -1;
7447
7448                 switch (opcode) {
7449                 case BPF_JEQ:
7450                         return 0;
7451                 case BPF_JNE:
7452                         return 1;
7453                 default:
7454                         return -1;
7455                 }
7456         }
7457
7458         if (is_jmp32)
7459                 return is_branch32_taken(reg, val, opcode);
7460         return is_branch64_taken(reg, val, opcode);
7461 }
7462
7463 /* Adjusts the register min/max values in the case that the dst_reg is the
7464  * variable register that we are working on, and src_reg is a constant or we're
7465  * simply doing a BPF_K check.
7466  * In JEQ/JNE cases we also adjust the var_off values.
7467  */
7468 static void reg_set_min_max(struct bpf_reg_state *true_reg,
7469                             struct bpf_reg_state *false_reg,
7470                             u64 val, u32 val32,
7471                             u8 opcode, bool is_jmp32)
7472 {
7473         struct tnum false_32off = tnum_subreg(false_reg->var_off);
7474         struct tnum false_64off = false_reg->var_off;
7475         struct tnum true_32off = tnum_subreg(true_reg->var_off);
7476         struct tnum true_64off = true_reg->var_off;
7477         s64 sval = (s64)val;
7478         s32 sval32 = (s32)val32;
7479
7480         /* If the dst_reg is a pointer, we can't learn anything about its
7481          * variable offset from the compare (unless src_reg were a pointer into
7482          * the same object, but we don't bother with that.
7483          * Since false_reg and true_reg have the same type by construction, we
7484          * only need to check one of them for pointerness.
7485          */
7486         if (__is_pointer_value(false, false_reg))
7487                 return;
7488
7489         switch (opcode) {
7490         case BPF_JEQ:
7491         case BPF_JNE:
7492         {
7493                 struct bpf_reg_state *reg =
7494                         opcode == BPF_JEQ ? true_reg : false_reg;
7495
7496                 /* JEQ/JNE comparison doesn't change the register equivalence.
7497                  * r1 = r2;
7498                  * if (r1 == 42) goto label;
7499                  * ...
7500                  * label: // here both r1 and r2 are known to be 42.
7501                  *
7502                  * Hence when marking register as known preserve it's ID.
7503                  */
7504                 if (is_jmp32)
7505                         __mark_reg32_known(reg, val32);
7506                 else
7507                         ___mark_reg_known(reg, val);
7508                 break;
7509         }
7510         case BPF_JSET:
7511                 if (is_jmp32) {
7512                         false_32off = tnum_and(false_32off, tnum_const(~val32));
7513                         if (is_power_of_2(val32))
7514                                 true_32off = tnum_or(true_32off,
7515                                                      tnum_const(val32));
7516                 } else {
7517                         false_64off = tnum_and(false_64off, tnum_const(~val));
7518                         if (is_power_of_2(val))
7519                                 true_64off = tnum_or(true_64off,
7520                                                      tnum_const(val));
7521                 }
7522                 break;
7523         case BPF_JGE:
7524         case BPF_JGT:
7525         {
7526                 if (is_jmp32) {
7527                         u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
7528                         u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7529
7530                         false_reg->u32_max_value = min(false_reg->u32_max_value,
7531                                                        false_umax);
7532                         true_reg->u32_min_value = max(true_reg->u32_min_value,
7533                                                       true_umin);
7534                 } else {
7535                         u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
7536                         u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7537
7538                         false_reg->umax_value = min(false_reg->umax_value, false_umax);
7539                         true_reg->umin_value = max(true_reg->umin_value, true_umin);
7540                 }
7541                 break;
7542         }
7543         case BPF_JSGE:
7544         case BPF_JSGT:
7545         {
7546                 if (is_jmp32) {
7547                         s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
7548                         s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7549
7550                         false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7551                         true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7552                 } else {
7553                         s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
7554                         s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7555
7556                         false_reg->smax_value = min(false_reg->smax_value, false_smax);
7557                         true_reg->smin_value = max(true_reg->smin_value, true_smin);
7558                 }
7559                 break;
7560         }
7561         case BPF_JLE:
7562         case BPF_JLT:
7563         {
7564                 if (is_jmp32) {
7565                         u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
7566                         u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7567
7568                         false_reg->u32_min_value = max(false_reg->u32_min_value,
7569                                                        false_umin);
7570                         true_reg->u32_max_value = min(true_reg->u32_max_value,
7571                                                       true_umax);
7572                 } else {
7573                         u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
7574                         u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7575
7576                         false_reg->umin_value = max(false_reg->umin_value, false_umin);
7577                         true_reg->umax_value = min(true_reg->umax_value, true_umax);
7578                 }
7579                 break;
7580         }
7581         case BPF_JSLE:
7582         case BPF_JSLT:
7583         {
7584                 if (is_jmp32) {
7585                         s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
7586                         s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7587
7588                         false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7589                         true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7590                 } else {
7591                         s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
7592                         s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7593
7594                         false_reg->smin_value = max(false_reg->smin_value, false_smin);
7595                         true_reg->smax_value = min(true_reg->smax_value, true_smax);
7596                 }
7597                 break;
7598         }
7599         default:
7600                 return;
7601         }
7602
7603         if (is_jmp32) {
7604                 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
7605                                              tnum_subreg(false_32off));
7606                 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
7607                                             tnum_subreg(true_32off));
7608                 __reg_combine_32_into_64(false_reg);
7609                 __reg_combine_32_into_64(true_reg);
7610         } else {
7611                 false_reg->var_off = false_64off;
7612                 true_reg->var_off = true_64off;
7613                 __reg_combine_64_into_32(false_reg);
7614                 __reg_combine_64_into_32(true_reg);
7615         }
7616 }
7617
7618 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
7619  * the variable reg.
7620  */
7621 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
7622                                 struct bpf_reg_state *false_reg,
7623                                 u64 val, u32 val32,
7624                                 u8 opcode, bool is_jmp32)
7625 {
7626         /* How can we transform "a <op> b" into "b <op> a"? */
7627         static const u8 opcode_flip[16] = {
7628                 /* these stay the same */
7629                 [BPF_JEQ  >> 4] = BPF_JEQ,
7630                 [BPF_JNE  >> 4] = BPF_JNE,
7631                 [BPF_JSET >> 4] = BPF_JSET,
7632                 /* these swap "lesser" and "greater" (L and G in the opcodes) */
7633                 [BPF_JGE  >> 4] = BPF_JLE,
7634                 [BPF_JGT  >> 4] = BPF_JLT,
7635                 [BPF_JLE  >> 4] = BPF_JGE,
7636                 [BPF_JLT  >> 4] = BPF_JGT,
7637                 [BPF_JSGE >> 4] = BPF_JSLE,
7638                 [BPF_JSGT >> 4] = BPF_JSLT,
7639                 [BPF_JSLE >> 4] = BPF_JSGE,
7640                 [BPF_JSLT >> 4] = BPF_JSGT
7641         };
7642         opcode = opcode_flip[opcode >> 4];
7643         /* This uses zero as "not present in table"; luckily the zero opcode,
7644          * BPF_JA, can't get here.
7645          */
7646         if (opcode)
7647                 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7648 }
7649
7650 /* Regs are known to be equal, so intersect their min/max/var_off */
7651 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
7652                                   struct bpf_reg_state *dst_reg)
7653 {
7654         src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
7655                                                         dst_reg->umin_value);
7656         src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
7657                                                         dst_reg->umax_value);
7658         src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
7659                                                         dst_reg->smin_value);
7660         src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
7661                                                         dst_reg->smax_value);
7662         src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
7663                                                              dst_reg->var_off);
7664         /* We might have learned new bounds from the var_off. */
7665         __update_reg_bounds(src_reg);
7666         __update_reg_bounds(dst_reg);
7667         /* We might have learned something about the sign bit. */
7668         __reg_deduce_bounds(src_reg);
7669         __reg_deduce_bounds(dst_reg);
7670         /* We might have learned some bits from the bounds. */
7671         __reg_bound_offset(src_reg);
7672         __reg_bound_offset(dst_reg);
7673         /* Intersecting with the old var_off might have improved our bounds
7674          * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
7675          * then new var_off is (0; 0x7f...fc) which improves our umax.
7676          */
7677         __update_reg_bounds(src_reg);
7678         __update_reg_bounds(dst_reg);
7679 }
7680
7681 static void reg_combine_min_max(struct bpf_reg_state *true_src,
7682                                 struct bpf_reg_state *true_dst,
7683                                 struct bpf_reg_state *false_src,
7684                                 struct bpf_reg_state *false_dst,
7685                                 u8 opcode)
7686 {
7687         switch (opcode) {
7688         case BPF_JEQ:
7689                 __reg_combine_min_max(true_src, true_dst);
7690                 break;
7691         case BPF_JNE:
7692                 __reg_combine_min_max(false_src, false_dst);
7693                 break;
7694         }
7695 }
7696
7697 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
7698                                  struct bpf_reg_state *reg, u32 id,
7699                                  bool is_null)
7700 {
7701         if (reg_type_may_be_null(reg->type) && reg->id == id &&
7702             !WARN_ON_ONCE(!reg->id)) {
7703                 /* Old offset (both fixed and variable parts) should
7704                  * have been known-zero, because we don't allow pointer
7705                  * arithmetic on pointers that might be NULL.
7706                  */
7707                 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
7708                                  !tnum_equals_const(reg->var_off, 0) ||
7709                                  reg->off)) {
7710                         __mark_reg_known_zero(reg);
7711                         reg->off = 0;
7712                 }
7713                 if (is_null) {
7714                         reg->type = SCALAR_VALUE;
7715                 } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
7716                         const struct bpf_map *map = reg->map_ptr;
7717
7718                         if (map->inner_map_meta) {
7719                                 reg->type = CONST_PTR_TO_MAP;
7720                                 reg->map_ptr = map->inner_map_meta;
7721                         } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7722                                 reg->type = PTR_TO_XDP_SOCK;
7723                         } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
7724                                    map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7725                                 reg->type = PTR_TO_SOCKET;
7726                         } else {
7727                                 reg->type = PTR_TO_MAP_VALUE;
7728                         }
7729                 } else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
7730                         reg->type = PTR_TO_SOCKET;
7731                 } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
7732                         reg->type = PTR_TO_SOCK_COMMON;
7733                 } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
7734                         reg->type = PTR_TO_TCP_SOCK;
7735                 } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
7736                         reg->type = PTR_TO_BTF_ID;
7737                 } else if (reg->type == PTR_TO_MEM_OR_NULL) {
7738                         reg->type = PTR_TO_MEM;
7739                 } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
7740                         reg->type = PTR_TO_RDONLY_BUF;
7741                 } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
7742                         reg->type = PTR_TO_RDWR_BUF;
7743                 }
7744                 if (is_null) {
7745                         /* We don't need id and ref_obj_id from this point
7746                          * onwards anymore, thus we should better reset it,
7747                          * so that state pruning has chances to take effect.
7748                          */
7749                         reg->id = 0;
7750                         reg->ref_obj_id = 0;
7751                 } else if (!reg_may_point_to_spin_lock(reg)) {
7752                         /* For not-NULL ptr, reg->ref_obj_id will be reset
7753                          * in release_reg_references().
7754                          *
7755                          * reg->id is still used by spin_lock ptr. Other
7756                          * than spin_lock ptr type, reg->id can be reset.
7757                          */
7758                         reg->id = 0;
7759                 }
7760         }
7761 }
7762
7763 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id,
7764                                     bool is_null)
7765 {
7766         struct bpf_reg_state *reg;
7767         int i;
7768
7769         for (i = 0; i < MAX_BPF_REG; i++)
7770                 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
7771
7772         bpf_for_each_spilled_reg(i, state, reg) {
7773                 if (!reg)
7774                         continue;
7775                 mark_ptr_or_null_reg(state, reg, id, is_null);
7776         }
7777 }
7778
7779 /* The logic is similar to find_good_pkt_pointers(), both could eventually
7780  * be folded together at some point.
7781  */
7782 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
7783                                   bool is_null)
7784 {
7785         struct bpf_func_state *state = vstate->frame[vstate->curframe];
7786         struct bpf_reg_state *regs = state->regs;
7787         u32 ref_obj_id = regs[regno].ref_obj_id;
7788         u32 id = regs[regno].id;
7789         int i;
7790
7791         if (ref_obj_id && ref_obj_id == id && is_null)
7792                 /* regs[regno] is in the " == NULL" branch.
7793                  * No one could have freed the reference state before
7794                  * doing the NULL check.
7795                  */
7796                 WARN_ON_ONCE(release_reference_state(state, id));
7797
7798         for (i = 0; i <= vstate->curframe; i++)
7799                 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
7800 }
7801
7802 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
7803                                    struct bpf_reg_state *dst_reg,
7804                                    struct bpf_reg_state *src_reg,
7805                                    struct bpf_verifier_state *this_branch,
7806                                    struct bpf_verifier_state *other_branch)
7807 {
7808         if (BPF_SRC(insn->code) != BPF_X)
7809                 return false;
7810
7811         /* Pointers are always 64-bit. */
7812         if (BPF_CLASS(insn->code) == BPF_JMP32)
7813                 return false;
7814
7815         switch (BPF_OP(insn->code)) {
7816         case BPF_JGT:
7817                 if ((dst_reg->type == PTR_TO_PACKET &&
7818                      src_reg->type == PTR_TO_PACKET_END) ||
7819                     (dst_reg->type == PTR_TO_PACKET_META &&
7820                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7821                         /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
7822                         find_good_pkt_pointers(this_branch, dst_reg,
7823                                                dst_reg->type, false);
7824                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7825                             src_reg->type == PTR_TO_PACKET) ||
7826                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7827                             src_reg->type == PTR_TO_PACKET_META)) {
7828                         /* pkt_end > pkt_data', pkt_data > pkt_meta' */
7829                         find_good_pkt_pointers(other_branch, src_reg,
7830                                                src_reg->type, true);
7831                 } else {
7832                         return false;
7833                 }
7834                 break;
7835         case BPF_JLT:
7836                 if ((dst_reg->type == PTR_TO_PACKET &&
7837                      src_reg->type == PTR_TO_PACKET_END) ||
7838                     (dst_reg->type == PTR_TO_PACKET_META &&
7839                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7840                         /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
7841                         find_good_pkt_pointers(other_branch, dst_reg,
7842                                                dst_reg->type, true);
7843                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7844                             src_reg->type == PTR_TO_PACKET) ||
7845                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7846                             src_reg->type == PTR_TO_PACKET_META)) {
7847                         /* pkt_end < pkt_data', pkt_data > pkt_meta' */
7848                         find_good_pkt_pointers(this_branch, src_reg,
7849                                                src_reg->type, false);
7850                 } else {
7851                         return false;
7852                 }
7853                 break;
7854         case BPF_JGE:
7855                 if ((dst_reg->type == PTR_TO_PACKET &&
7856                      src_reg->type == PTR_TO_PACKET_END) ||
7857                     (dst_reg->type == PTR_TO_PACKET_META &&
7858                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7859                         /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
7860                         find_good_pkt_pointers(this_branch, dst_reg,
7861                                                dst_reg->type, true);
7862                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7863                             src_reg->type == PTR_TO_PACKET) ||
7864                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7865                             src_reg->type == PTR_TO_PACKET_META)) {
7866                         /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
7867                         find_good_pkt_pointers(other_branch, src_reg,
7868                                                src_reg->type, false);
7869                 } else {
7870                         return false;
7871                 }
7872                 break;
7873         case BPF_JLE:
7874                 if ((dst_reg->type == PTR_TO_PACKET &&
7875                      src_reg->type == PTR_TO_PACKET_END) ||
7876                     (dst_reg->type == PTR_TO_PACKET_META &&
7877                      reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7878                         /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
7879                         find_good_pkt_pointers(other_branch, dst_reg,
7880                                                dst_reg->type, false);
7881                 } else if ((dst_reg->type == PTR_TO_PACKET_END &&
7882                             src_reg->type == PTR_TO_PACKET) ||
7883                            (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7884                             src_reg->type == PTR_TO_PACKET_META)) {
7885                         /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
7886                         find_good_pkt_pointers(this_branch, src_reg,
7887                                                src_reg->type, true);
7888                 } else {
7889                         return false;
7890                 }
7891                 break;
7892         default:
7893                 return false;
7894         }
7895
7896         return true;
7897 }
7898
7899 static void find_equal_scalars(struct bpf_verifier_state *vstate,
7900                                struct bpf_reg_state *known_reg)
7901 {
7902         struct bpf_func_state *state;
7903         struct bpf_reg_state *reg;
7904         int i, j;
7905
7906         for (i = 0; i <= vstate->curframe; i++) {
7907                 state = vstate->frame[i];
7908                 for (j = 0; j < MAX_BPF_REG; j++) {
7909                         reg = &state->regs[j];
7910                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7911                                 *reg = *known_reg;
7912                 }
7913
7914                 bpf_for_each_spilled_reg(j, state, reg) {
7915                         if (!reg)
7916                                 continue;
7917                         if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7918                                 *reg = *known_reg;
7919                 }
7920         }
7921 }
7922
7923 static int check_cond_jmp_op(struct bpf_verifier_env *env,
7924                              struct bpf_insn *insn, int *insn_idx)
7925 {
7926         struct bpf_verifier_state *this_branch = env->cur_state;
7927         struct bpf_verifier_state *other_branch;
7928         struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7929         struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7930         u8 opcode = BPF_OP(insn->code);
7931         bool is_jmp32;
7932         int pred = -1;
7933         int err;
7934
7935         /* Only conditional jumps are expected to reach here. */
7936         if (opcode == BPF_JA || opcode > BPF_JSLE) {
7937                 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7938                 return -EINVAL;
7939         }
7940
7941         if (BPF_SRC(insn->code) == BPF_X) {
7942                 if (insn->imm != 0) {
7943                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7944                         return -EINVAL;
7945                 }
7946
7947                 /* check src1 operand */
7948                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7949                 if (err)
7950                         return err;
7951
7952                 if (is_pointer_value(env, insn->src_reg)) {
7953                         verbose(env, "R%d pointer comparison prohibited\n",
7954                                 insn->src_reg);
7955                         return -EACCES;
7956                 }
7957                 src_reg = &regs[insn->src_reg];
7958         } else {
7959                 if (insn->src_reg != BPF_REG_0) {
7960                         verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7961                         return -EINVAL;
7962                 }
7963         }
7964
7965         /* check src2 operand */
7966         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7967         if (err)
7968                 return err;
7969
7970         dst_reg = &regs[insn->dst_reg];
7971         is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
7972
7973         if (BPF_SRC(insn->code) == BPF_K) {
7974                 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7975         } else if (src_reg->type == SCALAR_VALUE &&
7976                    is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7977                 pred = is_branch_taken(dst_reg,
7978                                        tnum_subreg(src_reg->var_off).value,
7979                                        opcode,
7980                                        is_jmp32);
7981         } else if (src_reg->type == SCALAR_VALUE &&
7982                    !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7983                 pred = is_branch_taken(dst_reg,
7984                                        src_reg->var_off.value,
7985                                        opcode,
7986                                        is_jmp32);
7987         }
7988
7989         if (pred >= 0) {
7990                 /* If we get here with a dst_reg pointer type it is because
7991                  * above is_branch_taken() special cased the 0 comparison.
7992                  */
7993                 if (!__is_pointer_value(false, dst_reg))
7994                         err = mark_chain_precision(env, insn->dst_reg);
7995                 if (BPF_SRC(insn->code) == BPF_X && !err)
7996                         err = mark_chain_precision(env, insn->src_reg);
7997                 if (err)
7998                         return err;
7999         }
8000
8001         if (pred == 1) {
8002                 /* Only follow the goto, ignore fall-through. If needed, push
8003                  * the fall-through branch for simulation under speculative
8004                  * execution.
8005                  */
8006                 if (!env->bypass_spec_v1 &&
8007                     !sanitize_speculative_path(env, insn, *insn_idx + 1,
8008                                                *insn_idx))
8009                         return -EFAULT;
8010                 *insn_idx += insn->off;
8011                 return 0;
8012         } else if (pred == 0) {
8013                 /* Only follow the fall-through branch, since that's where the
8014                  * program will go. If needed, push the goto branch for
8015                  * simulation under speculative execution.
8016                  */
8017                 if (!env->bypass_spec_v1 &&
8018                     !sanitize_speculative_path(env, insn,
8019                                                *insn_idx + insn->off + 1,
8020                                                *insn_idx))
8021                         return -EFAULT;
8022                 return 0;
8023         }
8024
8025         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
8026                                   false);
8027         if (!other_branch)
8028                 return -EFAULT;
8029         other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
8030
8031         /* detect if we are comparing against a constant value so we can adjust
8032          * our min/max values for our dst register.
8033          * this is only legit if both are scalars (or pointers to the same
8034          * object, I suppose, but we don't support that right now), because
8035          * otherwise the different base pointers mean the offsets aren't
8036          * comparable.
8037          */
8038         if (BPF_SRC(insn->code) == BPF_X) {
8039                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
8040
8041                 if (dst_reg->type == SCALAR_VALUE &&
8042                     src_reg->type == SCALAR_VALUE) {
8043                         if (tnum_is_const(src_reg->var_off) ||
8044                             (is_jmp32 &&
8045                              tnum_is_const(tnum_subreg(src_reg->var_off))))
8046                                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
8047                                                 dst_reg,
8048                                                 src_reg->var_off.value,
8049                                                 tnum_subreg(src_reg->var_off).value,
8050                                                 opcode, is_jmp32);
8051                         else if (tnum_is_const(dst_reg->var_off) ||
8052                                  (is_jmp32 &&
8053                                   tnum_is_const(tnum_subreg(dst_reg->var_off))))
8054                                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
8055                                                     src_reg,
8056                                                     dst_reg->var_off.value,
8057                                                     tnum_subreg(dst_reg->var_off).value,
8058                                                     opcode, is_jmp32);
8059                         else if (!is_jmp32 &&
8060                                  (opcode == BPF_JEQ || opcode == BPF_JNE))
8061                                 /* Comparing for equality, we can combine knowledge */
8062                                 reg_combine_min_max(&other_branch_regs[insn->src_reg],
8063                                                     &other_branch_regs[insn->dst_reg],
8064                                                     src_reg, dst_reg, opcode);
8065                         if (src_reg->id &&
8066                             !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
8067                                 find_equal_scalars(this_branch, src_reg);
8068                                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
8069                         }
8070
8071                 }
8072         } else if (dst_reg->type == SCALAR_VALUE) {
8073                 reg_set_min_max(&other_branch_regs[insn->dst_reg],
8074                                         dst_reg, insn->imm, (u32)insn->imm,
8075                                         opcode, is_jmp32);
8076         }
8077
8078         if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
8079             !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
8080                 find_equal_scalars(this_branch, dst_reg);
8081                 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
8082         }
8083
8084         /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
8085          * NOTE: these optimizations below are related with pointer comparison
8086          *       which will never be JMP32.
8087          */
8088         if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
8089             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
8090             reg_type_may_be_null(dst_reg->type)) {
8091                 /* Mark all identical registers in each branch as either
8092                  * safe or unknown depending R == 0 or R != 0 conditional.
8093                  */
8094                 mark_ptr_or_null_regs(this_branch, insn->dst_reg,
8095                                       opcode == BPF_JNE);
8096                 mark_ptr_or_null_regs(other_branch, insn->dst_reg,
8097                                       opcode == BPF_JEQ);
8098         } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
8099                                            this_branch, other_branch) &&
8100                    is_pointer_value(env, insn->dst_reg)) {
8101                 verbose(env, "R%d pointer comparison prohibited\n",
8102                         insn->dst_reg);
8103                 return -EACCES;
8104         }
8105         if (env->log.level & BPF_LOG_LEVEL)
8106                 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
8107         return 0;
8108 }
8109
8110 /* verify BPF_LD_IMM64 instruction */
8111 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
8112 {
8113         struct bpf_insn_aux_data *aux = cur_aux(env);
8114         struct bpf_reg_state *regs = cur_regs(env);
8115         struct bpf_reg_state *dst_reg;
8116         struct bpf_map *map;
8117         int err;
8118
8119         if (BPF_SIZE(insn->code) != BPF_DW) {
8120                 verbose(env, "invalid BPF_LD_IMM insn\n");
8121                 return -EINVAL;
8122         }
8123         if (insn->off != 0) {
8124                 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
8125                 return -EINVAL;
8126         }
8127
8128         err = check_reg_arg(env, insn->dst_reg, DST_OP);
8129         if (err)
8130                 return err;
8131
8132         dst_reg = &regs[insn->dst_reg];
8133         if (insn->src_reg == 0) {
8134                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
8135
8136                 dst_reg->type = SCALAR_VALUE;
8137                 __mark_reg_known(&regs[insn->dst_reg], imm);
8138                 return 0;
8139         }
8140
8141         if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8142                 mark_reg_known_zero(env, regs, insn->dst_reg);
8143
8144                 dst_reg->type = aux->btf_var.reg_type;
8145                 switch (dst_reg->type) {
8146                 case PTR_TO_MEM:
8147                         dst_reg->mem_size = aux->btf_var.mem_size;
8148                         break;
8149                 case PTR_TO_BTF_ID:
8150                 case PTR_TO_PERCPU_BTF_ID:
8151                         dst_reg->btf_id = aux->btf_var.btf_id;
8152                         break;
8153                 default:
8154                         verbose(env, "bpf verifier is misconfigured\n");
8155                         return -EFAULT;
8156                 }
8157                 return 0;
8158         }
8159
8160         map = env->used_maps[aux->map_index];
8161         mark_reg_known_zero(env, regs, insn->dst_reg);
8162         dst_reg->map_ptr = map;
8163
8164         if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8165                 dst_reg->type = PTR_TO_MAP_VALUE;
8166                 dst_reg->off = aux->map_off;
8167                 if (map_value_has_spin_lock(map))
8168                         dst_reg->id = ++env->id_gen;
8169         } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8170                 dst_reg->type = CONST_PTR_TO_MAP;
8171         } else {
8172                 verbose(env, "bpf verifier is misconfigured\n");
8173                 return -EINVAL;
8174         }
8175
8176         return 0;
8177 }
8178
8179 static bool may_access_skb(enum bpf_prog_type type)
8180 {
8181         switch (type) {
8182         case BPF_PROG_TYPE_SOCKET_FILTER:
8183         case BPF_PROG_TYPE_SCHED_CLS:
8184         case BPF_PROG_TYPE_SCHED_ACT:
8185                 return true;
8186         default:
8187                 return false;
8188         }
8189 }
8190
8191 /* verify safety of LD_ABS|LD_IND instructions:
8192  * - they can only appear in the programs where ctx == skb
8193  * - since they are wrappers of function calls, they scratch R1-R5 registers,
8194  *   preserve R6-R9, and store return value into R0
8195  *
8196  * Implicit input:
8197  *   ctx == skb == R6 == CTX
8198  *
8199  * Explicit input:
8200  *   SRC == any register
8201  *   IMM == 32-bit immediate
8202  *
8203  * Output:
8204  *   R0 - 8/16/32-bit skb data converted to cpu endianness
8205  */
8206 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
8207 {
8208         struct bpf_reg_state *regs = cur_regs(env);
8209         static const int ctx_reg = BPF_REG_6;
8210         u8 mode = BPF_MODE(insn->code);
8211         int i, err;
8212
8213         if (!may_access_skb(resolve_prog_type(env->prog))) {
8214                 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
8215                 return -EINVAL;
8216         }
8217
8218         if (!env->ops->gen_ld_abs) {
8219                 verbose(env, "bpf verifier is misconfigured\n");
8220                 return -EINVAL;
8221         }
8222
8223         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
8224             BPF_SIZE(insn->code) == BPF_DW ||
8225             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
8226                 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
8227                 return -EINVAL;
8228         }
8229
8230         /* check whether implicit source operand (register R6) is readable */
8231         err = check_reg_arg(env, ctx_reg, SRC_OP);
8232         if (err)
8233                 return err;
8234
8235         /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8236          * gen_ld_abs() may terminate the program at runtime, leading to
8237          * reference leak.
8238          */
8239         err = check_reference_leak(env);
8240         if (err) {
8241                 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8242                 return err;
8243         }
8244
8245         if (env->cur_state->active_spin_lock) {
8246                 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8247                 return -EINVAL;
8248         }
8249
8250         if (regs[ctx_reg].type != PTR_TO_CTX) {
8251                 verbose(env,
8252                         "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
8253                 return -EINVAL;
8254         }
8255
8256         if (mode == BPF_IND) {
8257                 /* check explicit source operand */
8258                 err = check_reg_arg(env, insn->src_reg, SRC_OP);
8259                 if (err)
8260                         return err;
8261         }
8262
8263         err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
8264         if (err < 0)
8265                 return err;
8266
8267         /* reset caller saved regs to unreadable */
8268         for (i = 0; i < CALLER_SAVED_REGS; i++) {
8269                 mark_reg_not_init(env, regs, caller_saved[i]);
8270                 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8271         }
8272
8273         /* mark destination R0 register as readable, since it contains
8274          * the value fetched from the packet.
8275          * Already marked as written above.
8276          */
8277         mark_reg_unknown(env, regs, BPF_REG_0);
8278         /* ld_abs load up to 32-bit skb data. */
8279         regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
8280         return 0;
8281 }
8282
8283 static int check_return_code(struct bpf_verifier_env *env)
8284 {
8285         struct tnum enforce_attach_type_range = tnum_unknown;
8286         const struct bpf_prog *prog = env->prog;
8287         struct bpf_reg_state *reg;
8288         struct tnum range = tnum_range(0, 1);
8289         enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8290         int err;
8291         const bool is_subprog = env->cur_state->frame[0]->subprogno;
8292
8293         /* LSM and struct_ops func-ptr's return type could be "void" */
8294         if (!is_subprog &&
8295             (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
8296              prog_type == BPF_PROG_TYPE_LSM) &&
8297             !prog->aux->attach_func_proto->type)
8298                 return 0;
8299
8300         /* eBPF calling convetion is such that R0 is used
8301          * to return the value from eBPF program.
8302          * Make sure that it's readable at this time
8303          * of bpf_exit, which means that program wrote
8304          * something into it earlier
8305          */
8306         err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8307         if (err)
8308                 return err;
8309
8310         if (is_pointer_value(env, BPF_REG_0)) {
8311                 verbose(env, "R0 leaks addr as return value\n");
8312                 return -EACCES;
8313         }
8314
8315         reg = cur_regs(env) + BPF_REG_0;
8316         if (is_subprog) {
8317                 if (reg->type != SCALAR_VALUE) {
8318                         verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8319                                 reg_type_str[reg->type]);
8320                         return -EINVAL;
8321                 }
8322                 return 0;
8323         }
8324
8325         switch (prog_type) {
8326         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
8327                 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
8328                     env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8329                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8330                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8331                     env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8332                     env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
8333                         range = tnum_range(1, 1);
8334                 break;
8335         case BPF_PROG_TYPE_CGROUP_SKB:
8336                 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8337                         range = tnum_range(0, 3);
8338                         enforce_attach_type_range = tnum_range(2, 3);
8339                 }
8340                 break;
8341         case BPF_PROG_TYPE_CGROUP_SOCK:
8342         case BPF_PROG_TYPE_SOCK_OPS:
8343         case BPF_PROG_TYPE_CGROUP_DEVICE:
8344         case BPF_PROG_TYPE_CGROUP_SYSCTL:
8345         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
8346                 break;
8347         case BPF_PROG_TYPE_RAW_TRACEPOINT:
8348                 if (!env->prog->aux->attach_btf_id)
8349                         return 0;
8350                 range = tnum_const(0);
8351                 break;
8352         case BPF_PROG_TYPE_TRACING:
8353                 switch (env->prog->expected_attach_type) {
8354                 case BPF_TRACE_FENTRY:
8355                 case BPF_TRACE_FEXIT:
8356                         range = tnum_const(0);
8357                         break;
8358                 case BPF_TRACE_RAW_TP:
8359                 case BPF_MODIFY_RETURN:
8360                         return 0;
8361                 case BPF_TRACE_ITER:
8362                         break;
8363                 default:
8364                         return -ENOTSUPP;
8365                 }
8366                 break;
8367         case BPF_PROG_TYPE_SK_LOOKUP:
8368                 range = tnum_range(SK_DROP, SK_PASS);
8369                 break;
8370         case BPF_PROG_TYPE_EXT:
8371                 /* freplace program can return anything as its return value
8372                  * depends on the to-be-replaced kernel func or bpf program.
8373                  */
8374         default:
8375                 return 0;
8376         }
8377
8378         if (reg->type != SCALAR_VALUE) {
8379                 verbose(env, "At program exit the register R0 is not a known value (%s)\n",
8380                         reg_type_str[reg->type]);
8381                 return -EINVAL;
8382         }
8383
8384         if (!tnum_in(range, reg->var_off)) {
8385                 char tn_buf[48];
8386
8387                 verbose(env, "At program exit the register R0 ");
8388                 if (!tnum_is_unknown(reg->var_off)) {
8389                         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8390                         verbose(env, "has value %s", tn_buf);
8391                 } else {
8392                         verbose(env, "has unknown scalar value");
8393                 }
8394                 tnum_strn(tn_buf, sizeof(tn_buf), range);
8395                 verbose(env, " should have been in %s\n", tn_buf);
8396                 return -EINVAL;
8397         }
8398
8399         if (!tnum_is_unknown(enforce_attach_type_range) &&
8400             tnum_in(enforce_attach_type_range, reg->var_off))
8401                 env->prog->enforce_expected_attach_type = 1;
8402         return 0;
8403 }
8404
8405 /* non-recursive DFS pseudo code
8406  * 1  procedure DFS-iterative(G,v):
8407  * 2      label v as discovered
8408  * 3      let S be a stack
8409  * 4      S.push(v)
8410  * 5      while S is not empty
8411  * 6            t <- S.pop()
8412  * 7            if t is what we're looking for:
8413  * 8                return t
8414  * 9            for all edges e in G.adjacentEdges(t) do
8415  * 10               if edge e is already labelled
8416  * 11                   continue with the next edge
8417  * 12               w <- G.adjacentVertex(t,e)
8418  * 13               if vertex w is not discovered and not explored
8419  * 14                   label e as tree-edge
8420  * 15                   label w as discovered
8421  * 16                   S.push(w)
8422  * 17                   continue at 5
8423  * 18               else if vertex w is discovered
8424  * 19                   label e as back-edge
8425  * 20               else
8426  * 21                   // vertex w is explored
8427  * 22                   label e as forward- or cross-edge
8428  * 23           label t as explored
8429  * 24           S.pop()
8430  *
8431  * convention:
8432  * 0x10 - discovered
8433  * 0x11 - discovered and fall-through edge labelled
8434  * 0x12 - discovered and fall-through and branch edges labelled
8435  * 0x20 - explored
8436  */
8437
8438 enum {
8439         DISCOVERED = 0x10,
8440         EXPLORED = 0x20,
8441         FALLTHROUGH = 1,
8442         BRANCH = 2,
8443 };
8444
8445 static u32 state_htab_size(struct bpf_verifier_env *env)
8446 {
8447         return env->prog->len;
8448 }
8449
8450 static struct bpf_verifier_state_list **explored_state(
8451                                         struct bpf_verifier_env *env,
8452                                         int idx)
8453 {
8454         struct bpf_verifier_state *cur = env->cur_state;
8455         struct bpf_func_state *state = cur->frame[cur->curframe];
8456
8457         return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8458 }
8459
8460 static void init_explored_state(struct bpf_verifier_env *env, int idx)
8461 {
8462         env->insn_aux_data[idx].prune_point = true;
8463 }
8464
8465 /* t, w, e - match pseudo-code above:
8466  * t - index of current instruction
8467  * w - next instruction
8468  * e - edge
8469  */
8470 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
8471                      bool loop_ok)
8472 {
8473         int *insn_stack = env->cfg.insn_stack;
8474         int *insn_state = env->cfg.insn_state;
8475
8476         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
8477                 return 0;
8478
8479         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
8480                 return 0;
8481
8482         if (w < 0 || w >= env->prog->len) {
8483                 verbose_linfo(env, t, "%d: ", t);
8484                 verbose(env, "jump out of range from insn %d to %d\n", t, w);
8485                 return -EINVAL;
8486         }
8487
8488         if (e == BRANCH)
8489                 /* mark branch target for state pruning */
8490                 init_explored_state(env, w);
8491
8492         if (insn_state[w] == 0) {
8493                 /* tree-edge */
8494                 insn_state[t] = DISCOVERED | e;
8495                 insn_state[w] = DISCOVERED;
8496                 if (env->cfg.cur_stack >= env->prog->len)
8497                         return -E2BIG;
8498                 insn_stack[env->cfg.cur_stack++] = w;
8499                 return 1;
8500         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8501                 if (loop_ok && env->bpf_capable)
8502                         return 0;
8503                 verbose_linfo(env, t, "%d: ", t);
8504                 verbose_linfo(env, w, "%d: ", w);
8505                 verbose(env, "back-edge from insn %d to %d\n", t, w);
8506                 return -EINVAL;
8507         } else if (insn_state[w] == EXPLORED) {
8508                 /* forward- or cross-edge */
8509                 insn_state[t] = DISCOVERED | e;
8510         } else {
8511                 verbose(env, "insn state internal bug\n");
8512                 return -EFAULT;
8513         }
8514         return 0;
8515 }
8516
8517 /* non-recursive depth-first-search to detect loops in BPF program
8518  * loop == back-edge in directed graph
8519  */
8520 static int check_cfg(struct bpf_verifier_env *env)
8521 {
8522         struct bpf_insn *insns = env->prog->insnsi;
8523         int insn_cnt = env->prog->len;
8524         int *insn_stack, *insn_state;
8525         int ret = 0;
8526         int i, t;
8527
8528         insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8529         if (!insn_state)
8530                 return -ENOMEM;
8531
8532         insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8533         if (!insn_stack) {
8534                 kvfree(insn_state);
8535                 return -ENOMEM;
8536         }
8537
8538         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
8539         insn_stack[0] = 0; /* 0 is the first instruction */
8540         env->cfg.cur_stack = 1;
8541
8542 peek_stack:
8543         if (env->cfg.cur_stack == 0)
8544                 goto check_state;
8545         t = insn_stack[env->cfg.cur_stack - 1];
8546
8547         if (BPF_CLASS(insns[t].code) == BPF_JMP ||
8548             BPF_CLASS(insns[t].code) == BPF_JMP32) {
8549                 u8 opcode = BPF_OP(insns[t].code);
8550
8551                 if (opcode == BPF_EXIT) {
8552                         goto mark_explored;
8553                 } else if (opcode == BPF_CALL) {
8554                         ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8555                         if (ret == 1)
8556                                 goto peek_stack;
8557                         else if (ret < 0)
8558                                 goto err_free;
8559                         if (t + 1 < insn_cnt)
8560                                 init_explored_state(env, t + 1);
8561                         if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8562                                 init_explored_state(env, t);
8563                                 ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
8564                                                 env, false);
8565                                 if (ret == 1)
8566                                         goto peek_stack;
8567                                 else if (ret < 0)
8568                                         goto err_free;
8569                         }
8570                 } else if (opcode == BPF_JA) {
8571                         if (BPF_SRC(insns[t].code) != BPF_K) {
8572                                 ret = -EINVAL;
8573                                 goto err_free;
8574                         }
8575                         /* unconditional jump with single edge */
8576                         ret = push_insn(t, t + insns[t].off + 1,
8577                                         FALLTHROUGH, env, true);
8578                         if (ret == 1)
8579                                 goto peek_stack;
8580                         else if (ret < 0)
8581                                 goto err_free;
8582                         /* unconditional jmp is not a good pruning point,
8583                          * but it's marked, since backtracking needs
8584                          * to record jmp history in is_state_visited().
8585                          */
8586                         init_explored_state(env, t + insns[t].off + 1);
8587                         /* tell verifier to check for equivalent states
8588                          * after every call and jump
8589                          */
8590                         if (t + 1 < insn_cnt)
8591                                 init_explored_state(env, t + 1);
8592                 } else {
8593                         /* conditional jump with two edges */
8594                         init_explored_state(env, t);
8595                         ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8596                         if (ret == 1)
8597                                 goto peek_stack;
8598                         else if (ret < 0)
8599                                 goto err_free;
8600
8601                         ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8602                         if (ret == 1)
8603                                 goto peek_stack;
8604                         else if (ret < 0)
8605                                 goto err_free;
8606                 }
8607         } else {
8608                 /* all other non-branch instructions with single
8609                  * fall-through edge
8610                  */
8611                 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8612                 if (ret == 1)
8613                         goto peek_stack;
8614                 else if (ret < 0)
8615                         goto err_free;
8616         }
8617
8618 mark_explored:
8619         insn_state[t] = EXPLORED;
8620         if (env->cfg.cur_stack-- <= 0) {
8621                 verbose(env, "pop stack internal bug\n");
8622                 ret = -EFAULT;
8623                 goto err_free;
8624         }
8625         goto peek_stack;
8626
8627 check_state:
8628         for (i = 0; i < insn_cnt; i++) {
8629                 if (insn_state[i] != EXPLORED) {
8630                         verbose(env, "unreachable insn %d\n", i);
8631                         ret = -EINVAL;
8632                         goto err_free;
8633                 }
8634         }
8635         ret = 0; /* cfg looks good */
8636
8637 err_free:
8638         kvfree(insn_state);
8639         kvfree(insn_stack);
8640         env->cfg.insn_state = env->cfg.insn_stack = NULL;
8641         return ret;
8642 }
8643
8644 static int check_abnormal_return(struct bpf_verifier_env *env)
8645 {
8646         int i;
8647
8648         for (i = 1; i < env->subprog_cnt; i++) {
8649                 if (env->subprog_info[i].has_ld_abs) {
8650                         verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8651                         return -EINVAL;
8652                 }
8653                 if (env->subprog_info[i].has_tail_call) {
8654                         verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8655                         return -EINVAL;
8656                 }
8657         }
8658         return 0;
8659 }
8660
8661 /* The minimum supported BTF func info size */
8662 #define MIN_BPF_FUNCINFO_SIZE   8
8663 #define MAX_FUNCINFO_REC_SIZE   252
8664
8665 static int check_btf_func(struct bpf_verifier_env *env,
8666                           const union bpf_attr *attr,
8667                           union bpf_attr __user *uattr)
8668 {
8669         const struct btf_type *type, *func_proto, *ret_type;
8670         u32 i, nfuncs, urec_size, min_size;
8671         u32 krec_size = sizeof(struct bpf_func_info);
8672         struct bpf_func_info *krecord;
8673         struct bpf_func_info_aux *info_aux = NULL;
8674         struct bpf_prog *prog;
8675         const struct btf *btf;
8676         void __user *urecord;
8677         u32 prev_offset = 0;
8678         bool scalar_return;
8679         int ret = -ENOMEM;
8680
8681         nfuncs = attr->func_info_cnt;
8682         if (!nfuncs) {
8683                 if (check_abnormal_return(env))
8684                         return -EINVAL;
8685                 return 0;
8686         }
8687
8688         if (nfuncs != env->subprog_cnt) {
8689                 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8690                 return -EINVAL;
8691         }
8692
8693         urec_size = attr->func_info_rec_size;
8694         if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
8695             urec_size > MAX_FUNCINFO_REC_SIZE ||
8696             urec_size % sizeof(u32)) {
8697                 verbose(env, "invalid func info rec size %u\n", urec_size);
8698                 return -EINVAL;
8699         }
8700
8701         prog = env->prog;
8702         btf = prog->aux->btf;
8703
8704         urecord = u64_to_user_ptr(attr->func_info);
8705         min_size = min_t(u32, krec_size, urec_size);
8706
8707         krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8708         if (!krecord)
8709                 return -ENOMEM;
8710         info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8711         if (!info_aux)
8712                 goto err_free;
8713
8714         for (i = 0; i < nfuncs; i++) {
8715                 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8716                 if (ret) {
8717                         if (ret == -E2BIG) {
8718                                 verbose(env, "nonzero tailing record in func info");
8719                                 /* set the size kernel expects so loader can zero
8720                                  * out the rest of the record.
8721                                  */
8722                                 if (put_user(min_size, &uattr->func_info_rec_size))
8723                                         ret = -EFAULT;
8724                         }
8725                         goto err_free;
8726                 }
8727
8728                 if (copy_from_user(&krecord[i], urecord, min_size)) {
8729                         ret = -EFAULT;
8730                         goto err_free;
8731                 }
8732
8733                 /* check insn_off */
8734                 ret = -EINVAL;
8735                 if (i == 0) {
8736                         if (krecord[i].insn_off) {
8737                                 verbose(env,
8738                                         "nonzero insn_off %u for the first func info record",
8739                                         krecord[i].insn_off);
8740                                 goto err_free;
8741                         }
8742                 } else if (krecord[i].insn_off <= prev_offset) {
8743                         verbose(env,
8744                                 "same or smaller insn offset (%u) than previous func info record (%u)",
8745                                 krecord[i].insn_off, prev_offset);
8746                         goto err_free;
8747                 }
8748
8749                 if (env->subprog_info[i].start != krecord[i].insn_off) {
8750                         verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8751                         goto err_free;
8752                 }
8753
8754                 /* check type_id */
8755                 type = btf_type_by_id(btf, krecord[i].type_id);
8756                 if (!type || !btf_type_is_func(type)) {
8757                         verbose(env, "invalid type id %d in func info",
8758                                 krecord[i].type_id);
8759                         goto err_free;
8760                 }
8761                 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8762
8763                 func_proto = btf_type_by_id(btf, type->type);
8764                 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
8765                         /* btf_func_check() already verified it during BTF load */
8766                         goto err_free;
8767                 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8768                 scalar_return =
8769                         btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8770                 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8771                         verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8772                         goto err_free;
8773                 }
8774                 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8775                         verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8776                         goto err_free;
8777                 }
8778
8779                 prev_offset = krecord[i].insn_off;
8780                 urecord += urec_size;
8781         }
8782
8783         prog->aux->func_info = krecord;
8784         prog->aux->func_info_cnt = nfuncs;
8785         prog->aux->func_info_aux = info_aux;
8786         return 0;
8787
8788 err_free:
8789         kvfree(krecord);
8790         kfree(info_aux);
8791         return ret;
8792 }
8793
8794 static void adjust_btf_func(struct bpf_verifier_env *env)
8795 {
8796         struct bpf_prog_aux *aux = env->prog->aux;
8797         int i;
8798
8799         if (!aux->func_info)
8800                 return;
8801
8802         for (i = 0; i < env->subprog_cnt; i++)
8803                 aux->func_info[i].insn_off = env->subprog_info[i].start;
8804 }
8805
8806 #define MIN_BPF_LINEINFO_SIZE   (offsetof(struct bpf_line_info, line_col) + \
8807                 sizeof(((struct bpf_line_info *)(0))->line_col))
8808 #define MAX_LINEINFO_REC_SIZE   MAX_FUNCINFO_REC_SIZE
8809
8810 static int check_btf_line(struct bpf_verifier_env *env,
8811                           const union bpf_attr *attr,
8812                           union bpf_attr __user *uattr)
8813 {
8814         u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8815         struct bpf_subprog_info *sub;
8816         struct bpf_line_info *linfo;
8817         struct bpf_prog *prog;
8818         const struct btf *btf;
8819         void __user *ulinfo;
8820         int err;
8821
8822         nr_linfo = attr->line_info_cnt;
8823         if (!nr_linfo)
8824                 return 0;
8825         if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
8826                 return -EINVAL;
8827
8828         rec_size = attr->line_info_rec_size;
8829         if (rec_size < MIN_BPF_LINEINFO_SIZE ||
8830             rec_size > MAX_LINEINFO_REC_SIZE ||
8831             rec_size & (sizeof(u32) - 1))
8832                 return -EINVAL;
8833
8834         /* Need to zero it in case the userspace may
8835          * pass in a smaller bpf_line_info object.
8836          */
8837         linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
8838                          GFP_KERNEL | __GFP_NOWARN);
8839         if (!linfo)
8840                 return -ENOMEM;
8841
8842         prog = env->prog;
8843         btf = prog->aux->btf;
8844
8845         s = 0;
8846         sub = env->subprog_info;
8847         ulinfo = u64_to_user_ptr(attr->line_info);
8848         expected_size = sizeof(struct bpf_line_info);
8849         ncopy = min_t(u32, expected_size, rec_size);
8850         for (i = 0; i < nr_linfo; i++) {
8851                 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8852                 if (err) {
8853                         if (err == -E2BIG) {
8854                                 verbose(env, "nonzero tailing record in line_info");
8855                                 if (put_user(expected_size,
8856                                              &uattr->line_info_rec_size))
8857                                         err = -EFAULT;
8858                         }
8859                         goto err_free;
8860                 }
8861
8862                 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8863                         err = -EFAULT;
8864                         goto err_free;
8865                 }
8866
8867                 /*
8868                  * Check insn_off to ensure
8869                  * 1) strictly increasing AND
8870                  * 2) bounded by prog->len
8871                  *
8872                  * The linfo[0].insn_off == 0 check logically falls into
8873                  * the later "missing bpf_line_info for func..." case
8874                  * because the first linfo[0].insn_off must be the
8875                  * first sub also and the first sub must have
8876                  * subprog_info[0].start == 0.
8877                  */
8878                 if ((i && linfo[i].insn_off <= prev_offset) ||
8879                     linfo[i].insn_off >= prog->len) {
8880                         verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
8881                                 i, linfo[i].insn_off, prev_offset,
8882                                 prog->len);
8883                         err = -EINVAL;
8884                         goto err_free;
8885                 }
8886
8887                 if (!prog->insnsi[linfo[i].insn_off].code) {
8888                         verbose(env,
8889                                 "Invalid insn code at line_info[%u].insn_off\n",
8890                                 i);
8891                         err = -EINVAL;
8892                         goto err_free;
8893                 }
8894
8895                 if (!btf_name_by_offset(btf, linfo[i].line_off) ||
8896                     !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8897                         verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8898                         err = -EINVAL;
8899                         goto err_free;
8900                 }
8901
8902                 if (s != env->subprog_cnt) {
8903                         if (linfo[i].insn_off == sub[s].start) {
8904                                 sub[s].linfo_idx = i;
8905                                 s++;
8906                         } else if (sub[s].start < linfo[i].insn_off) {
8907                                 verbose(env, "missing bpf_line_info for func#%u\n", s);
8908                                 err = -EINVAL;
8909                                 goto err_free;
8910                         }
8911                 }
8912
8913                 prev_offset = linfo[i].insn_off;
8914                 ulinfo += rec_size;
8915         }
8916
8917         if (s != env->subprog_cnt) {
8918                 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
8919                         env->subprog_cnt - s, s);
8920                 err = -EINVAL;
8921                 goto err_free;
8922         }
8923
8924         prog->aux->linfo = linfo;
8925         prog->aux->nr_linfo = nr_linfo;
8926
8927         return 0;
8928
8929 err_free:
8930         kvfree(linfo);
8931         return err;
8932 }
8933
8934 static int check_btf_info(struct bpf_verifier_env *env,
8935                           const union bpf_attr *attr,
8936                           union bpf_attr __user *uattr)
8937 {
8938         struct btf *btf;
8939         int err;
8940
8941         if (!attr->func_info_cnt && !attr->line_info_cnt) {
8942                 if (check_abnormal_return(env))
8943                         return -EINVAL;
8944                 return 0;
8945         }
8946
8947         btf = btf_get_by_fd(attr->prog_btf_fd);
8948         if (IS_ERR(btf))
8949                 return PTR_ERR(btf);
8950         env->prog->aux->btf = btf;
8951
8952         err = check_btf_func(env, attr, uattr);
8953         if (err)
8954                 return err;
8955
8956         err = check_btf_line(env, attr, uattr);
8957         if (err)
8958                 return err;
8959
8960         return 0;
8961 }
8962
8963 /* check %cur's range satisfies %old's */
8964 static bool range_within(struct bpf_reg_state *old,
8965                          struct bpf_reg_state *cur)
8966 {
8967         return old->umin_value <= cur->umin_value &&
8968                old->umax_value >= cur->umax_value &&
8969                old->smin_value <= cur->smin_value &&
8970                old->smax_value >= cur->smax_value &&
8971                old->u32_min_value <= cur->u32_min_value &&
8972                old->u32_max_value >= cur->u32_max_value &&
8973                old->s32_min_value <= cur->s32_min_value &&
8974                old->s32_max_value >= cur->s32_max_value;
8975 }
8976
8977 /* If in the old state two registers had the same id, then they need to have
8978  * the same id in the new state as well.  But that id could be different from
8979  * the old state, so we need to track the mapping from old to new ids.
8980  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
8981  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
8982  * regs with a different old id could still have new id 9, we don't care about
8983  * that.
8984  * So we look through our idmap to see if this old id has been seen before.  If
8985  * so, we require the new id to match; otherwise, we add the id pair to the map.
8986  */
8987 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
8988 {
8989         unsigned int i;
8990
8991         for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
8992                 if (!idmap[i].old) {
8993                         /* Reached an empty slot; haven't seen this id before */
8994                         idmap[i].old = old_id;
8995                         idmap[i].cur = cur_id;
8996                         return true;
8997                 }
8998                 if (idmap[i].old == old_id)
8999                         return idmap[i].cur == cur_id;
9000         }
9001         /* We ran out of idmap slots, which should be impossible */
9002         WARN_ON_ONCE(1);
9003         return false;
9004 }
9005
9006 static void clean_func_state(struct bpf_verifier_env *env,
9007                              struct bpf_func_state *st)
9008 {
9009         enum bpf_reg_liveness live;
9010         int i, j;
9011
9012         for (i = 0; i < BPF_REG_FP; i++) {
9013                 live = st->regs[i].live;
9014                 /* liveness must not touch this register anymore */
9015                 st->regs[i].live |= REG_LIVE_DONE;
9016                 if (!(live & REG_LIVE_READ))
9017                         /* since the register is unused, clear its state
9018                          * to make further comparison simpler
9019                          */
9020                         __mark_reg_not_init(env, &st->regs[i]);
9021         }
9022
9023         for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
9024                 live = st->stack[i].spilled_ptr.live;
9025                 /* liveness must not touch this stack slot anymore */
9026                 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
9027                 if (!(live & REG_LIVE_READ)) {
9028                         __mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9029                         for (j = 0; j < BPF_REG_SIZE; j++)
9030                                 st->stack[i].slot_type[j] = STACK_INVALID;
9031                 }
9032         }
9033 }
9034
9035 static void clean_verifier_state(struct bpf_verifier_env *env,
9036                                  struct bpf_verifier_state *st)
9037 {
9038         int i;
9039
9040         if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
9041                 /* all regs in this state in all frames were already marked */
9042                 return;
9043
9044         for (i = 0; i <= st->curframe; i++)
9045                 clean_func_state(env, st->frame[i]);
9046 }
9047
9048 /* the parentage chains form a tree.
9049  * the verifier states are added to state lists at given insn and
9050  * pushed into state stack for future exploration.
9051  * when the verifier reaches bpf_exit insn some of the verifer states
9052  * stored in the state lists have their final liveness state already,
9053  * but a lot of states will get revised from liveness point of view when
9054  * the verifier explores other branches.
9055  * Example:
9056  * 1: r0 = 1
9057  * 2: if r1 == 100 goto pc+1
9058  * 3: r0 = 2
9059  * 4: exit
9060  * when the verifier reaches exit insn the register r0 in the state list of
9061  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
9062  * of insn 2 and goes exploring further. At the insn 4 it will walk the
9063  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
9064  *
9065  * Since the verifier pushes the branch states as it sees them while exploring
9066  * the program the condition of walking the branch instruction for the second
9067  * time means that all states below this branch were already explored and
9068  * their final liveness markes are already propagated.
9069  * Hence when the verifier completes the search of state list in is_state_visited()
9070  * we can call this clean_live_states() function to mark all liveness states
9071  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
9072  * will not be used.
9073  * This function also clears the registers and stack for states that !READ
9074  * to simplify state merging.
9075  *
9076  * Important note here that walking the same branch instruction in the callee
9077  * doesn't meant that the states are DONE. The verifier has to compare
9078  * the callsites
9079  */
9080 static void clean_live_states(struct bpf_verifier_env *env, int insn,
9081                               struct bpf_verifier_state *cur)
9082 {
9083         struct bpf_verifier_state_list *sl;
9084         int i;
9085
9086         sl = *explored_state(env, insn);
9087         while (sl) {
9088                 if (sl->state.branches)
9089                         goto next;
9090                 if (sl->state.insn_idx != insn ||
9091                     sl->state.curframe != cur->curframe)
9092                         goto next;
9093                 for (i = 0; i <= cur->curframe; i++)
9094                         if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
9095                                 goto next;
9096                 clean_verifier_state(env, &sl->state);
9097 next:
9098                 sl = sl->next;
9099         }
9100 }
9101
9102 /* Returns true if (rold safe implies rcur safe) */
9103 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
9104                     struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
9105 {
9106         bool equal;
9107
9108         if (!(rold->live & REG_LIVE_READ))
9109                 /* explored state didn't use this */
9110                 return true;
9111
9112         equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
9113
9114         if (rold->type == PTR_TO_STACK)
9115                 /* two stack pointers are equal only if they're pointing to
9116                  * the same stack frame, since fp-8 in foo != fp-8 in bar
9117                  */
9118                 return equal && rold->frameno == rcur->frameno;
9119
9120         if (equal)
9121                 return true;
9122
9123         if (rold->type == NOT_INIT)
9124                 /* explored state can't have used this */
9125                 return true;
9126         if (rcur->type == NOT_INIT)
9127                 return false;
9128         switch (rold->type) {
9129         case SCALAR_VALUE:
9130                 if (env->explore_alu_limits)
9131                         return false;
9132                 if (rcur->type == SCALAR_VALUE) {
9133                         if (!rold->precise && !rcur->precise)
9134                                 return true;
9135                         /* new val must satisfy old val knowledge */
9136                         return range_within(rold, rcur) &&
9137                                tnum_in(rold->var_off, rcur->var_off);
9138                 } else {
9139                         /* We're trying to use a pointer in place of a scalar.
9140                          * Even if the scalar was unbounded, this could lead to
9141                          * pointer leaks because scalars are allowed to leak
9142                          * while pointers are not. We could make this safe in
9143                          * special cases if root is calling us, but it's
9144                          * probably not worth the hassle.
9145                          */
9146                         return false;
9147                 }
9148         case PTR_TO_MAP_VALUE:
9149                 /* If the new min/max/var_off satisfy the old ones and
9150                  * everything else matches, we are OK.
9151                  * 'id' is not compared, since it's only used for maps with
9152                  * bpf_spin_lock inside map element and in such cases if
9153                  * the rest of the prog is valid for one map element then
9154                  * it's valid for all map elements regardless of the key
9155                  * used in bpf_map_lookup()
9156                  */
9157                 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
9158                        range_within(rold, rcur) &&
9159                        tnum_in(rold->var_off, rcur->var_off);
9160         case PTR_TO_MAP_VALUE_OR_NULL:
9161                 /* a PTR_TO_MAP_VALUE could be safe to use as a
9162                  * PTR_TO_MAP_VALUE_OR_NULL into the same map.
9163                  * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
9164                  * checked, doing so could have affected others with the same
9165                  * id, and we can't check for that because we lost the id when
9166                  * we converted to a PTR_TO_MAP_VALUE.
9167                  */
9168                 if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
9169                         return false;
9170                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
9171                         return false;
9172                 /* Check our ids match any regs they're supposed to */
9173                 return check_ids(rold->id, rcur->id, idmap);
9174         case PTR_TO_PACKET_META:
9175         case PTR_TO_PACKET:
9176                 if (rcur->type != rold->type)
9177                         return false;
9178                 /* We must have at least as much range as the old ptr
9179                  * did, so that any accesses which were safe before are
9180                  * still safe.  This is true even if old range < old off,
9181                  * since someone could have accessed through (ptr - k), or
9182                  * even done ptr -= k in a register, to get a safe access.
9183                  */
9184                 if (rold->range > rcur->range)
9185                         return false;
9186                 /* If the offsets don't match, we can't trust our alignment;
9187                  * nor can we be sure that we won't fall out of range.
9188                  */
9189                 if (rold->off != rcur->off)
9190                         return false;
9191                 /* id relations must be preserved */
9192                 if (rold->id && !check_ids(rold->id, rcur->id, idmap))
9193                         return false;
9194                 /* new val must satisfy old val knowledge */
9195                 return range_within(rold, rcur) &&
9196                        tnum_in(rold->var_off, rcur->var_off);
9197         case PTR_TO_CTX:
9198         case CONST_PTR_TO_MAP:
9199         case PTR_TO_PACKET_END:
9200         case PTR_TO_FLOW_KEYS:
9201         case PTR_TO_SOCKET:
9202         case PTR_TO_SOCKET_OR_NULL:
9203         case PTR_TO_SOCK_COMMON:
9204         case PTR_TO_SOCK_COMMON_OR_NULL:
9205         case PTR_TO_TCP_SOCK:
9206         case PTR_TO_TCP_SOCK_OR_NULL:
9207         case PTR_TO_XDP_SOCK:
9208                 /* Only valid matches are exact, which memcmp() above
9209                  * would have accepted
9210                  */
9211         default:
9212                 /* Don't know what's going on, just say it's not safe */
9213                 return false;
9214         }
9215
9216         /* Shouldn't get here; if we do, say it's not safe */
9217         WARN_ON_ONCE(1);
9218         return false;
9219 }
9220
9221 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
9222                       struct bpf_func_state *cur, struct bpf_id_pair *idmap)
9223 {
9224         int i, spi;
9225
9226         /* walk slots of the explored stack and ignore any additional
9227          * slots in the current stack, since explored(safe) state
9228          * didn't use them
9229          */
9230         for (i = 0; i < old->allocated_stack; i++) {
9231                 spi = i / BPF_REG_SIZE;
9232
9233                 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9234                         i += BPF_REG_SIZE - 1;
9235                         /* explored state didn't use this */
9236                         continue;
9237                 }
9238
9239                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
9240                         continue;
9241
9242                 /* explored stack has more populated slots than current stack
9243                  * and these slots were used
9244                  */
9245                 if (i >= cur->allocated_stack)
9246                         return false;
9247
9248                 /* if old state was safe with misc data in the stack
9249                  * it will be safe with zero-initialized stack.
9250                  * The opposite is not true
9251                  */
9252                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
9253                     cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
9254                         continue;
9255                 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
9256                     cur->stack[spi].slot_type[i % BPF_REG_SIZE])
9257                         /* Ex: old explored (safe) state has STACK_SPILL in
9258                          * this stack slot, but current has STACK_MISC ->
9259                          * this verifier states are not equivalent,
9260                          * return false to continue verification of this path
9261                          */
9262                         return false;
9263                 if (i % BPF_REG_SIZE)
9264                         continue;
9265                 if (old->stack[spi].slot_type[0] != STACK_SPILL)
9266                         continue;
9267                 if (!regsafe(env, &old->stack[spi].spilled_ptr,
9268                              &cur->stack[spi].spilled_ptr, idmap))
9269                         /* when explored and current stack slot are both storing
9270                          * spilled registers, check that stored pointers types
9271                          * are the same as well.
9272                          * Ex: explored safe path could have stored
9273                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
9274                          * but current path has stored:
9275                          * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
9276                          * such verifier states are not equivalent.
9277                          * return false to continue verification of this path
9278                          */
9279                         return false;
9280         }
9281         return true;
9282 }
9283
9284 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9285 {
9286         if (old->acquired_refs != cur->acquired_refs)
9287                 return false;
9288         return !memcmp(old->refs, cur->refs,
9289                        sizeof(*old->refs) * old->acquired_refs);
9290 }
9291
9292 /* compare two verifier states
9293  *
9294  * all states stored in state_list are known to be valid, since
9295  * verifier reached 'bpf_exit' instruction through them
9296  *
9297  * this function is called when verifier exploring different branches of
9298  * execution popped from the state stack. If it sees an old state that has
9299  * more strict register state and more strict stack state then this execution
9300  * branch doesn't need to be explored further, since verifier already
9301  * concluded that more strict state leads to valid finish.
9302  *
9303  * Therefore two states are equivalent if register state is more conservative
9304  * and explored stack state is more conservative than the current one.
9305  * Example:
9306  *       explored                   current
9307  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
9308  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
9309  *
9310  * In other words if current stack state (one being explored) has more
9311  * valid slots than old one that already passed validation, it means
9312  * the verifier can stop exploring and conclude that current state is valid too
9313  *
9314  * Similarly with registers. If explored state has register type as invalid
9315  * whereas register type in current state is meaningful, it means that
9316  * the current state will reach 'bpf_exit' instruction safely
9317  */
9318 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
9319                               struct bpf_func_state *cur)
9320 {
9321         int i;
9322
9323         memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
9324         for (i = 0; i < MAX_BPF_REG; i++)
9325                 if (!regsafe(env, &old->regs[i], &cur->regs[i],
9326                              env->idmap_scratch))
9327                         return false;
9328
9329         if (!stacksafe(env, old, cur, env->idmap_scratch))
9330                 return false;
9331
9332         if (!refsafe(old, cur))
9333                 return false;
9334
9335         return true;
9336 }
9337
9338 static bool states_equal(struct bpf_verifier_env *env,
9339                          struct bpf_verifier_state *old,
9340                          struct bpf_verifier_state *cur)
9341 {
9342         int i;
9343
9344         if (old->curframe != cur->curframe)
9345                 return false;
9346
9347         /* Verification state from speculative execution simulation
9348          * must never prune a non-speculative execution one.
9349          */
9350         if (old->speculative && !cur->speculative)
9351                 return false;
9352
9353         if (old->active_spin_lock != cur->active_spin_lock)
9354                 return false;
9355
9356         /* for states to be equal callsites have to be the same
9357          * and all frame states need to be equivalent
9358          */
9359         for (i = 0; i <= old->curframe; i++) {
9360                 if (old->frame[i]->callsite != cur->frame[i]->callsite)
9361                         return false;
9362                 if (!func_states_equal(env, old->frame[i], cur->frame[i]))
9363                         return false;
9364         }
9365         return true;
9366 }
9367
9368 /* Return 0 if no propagation happened. Return negative error code if error
9369  * happened. Otherwise, return the propagated bit.
9370  */
9371 static int propagate_liveness_reg(struct bpf_verifier_env *env,
9372                                   struct bpf_reg_state *reg,
9373                                   struct bpf_reg_state *parent_reg)
9374 {
9375         u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9376         u8 flag = reg->live & REG_LIVE_READ;
9377         int err;
9378
9379         /* When comes here, read flags of PARENT_REG or REG could be any of
9380          * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9381          * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9382          */
9383         if (parent_flag == REG_LIVE_READ64 ||
9384             /* Or if there is no read flag from REG. */
9385             !flag ||
9386             /* Or if the read flag from REG is the same as PARENT_REG. */
9387             parent_flag == flag)
9388                 return 0;
9389
9390         err = mark_reg_read(env, reg, parent_reg, flag);
9391         if (err)
9392                 return err;
9393
9394         return flag;
9395 }
9396
9397 /* A write screens off any subsequent reads; but write marks come from the
9398  * straight-line code between a state and its parent.  When we arrive at an
9399  * equivalent state (jump target or such) we didn't arrive by the straight-line
9400  * code, so read marks in the state must propagate to the parent regardless
9401  * of the state's write marks. That's what 'parent == state->parent' comparison
9402  * in mark_reg_read() is for.
9403  */
9404 static int propagate_liveness(struct bpf_verifier_env *env,
9405                               const struct bpf_verifier_state *vstate,
9406                               struct bpf_verifier_state *vparent)
9407 {
9408         struct bpf_reg_state *state_reg, *parent_reg;
9409         struct bpf_func_state *state, *parent;
9410         int i, frame, err = 0;
9411
9412         if (vparent->curframe != vstate->curframe) {
9413                 WARN(1, "propagate_live: parent frame %d current frame %d\n",
9414                      vparent->curframe, vstate->curframe);
9415                 return -EFAULT;
9416         }
9417         /* Propagate read liveness of registers... */
9418         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
9419         for (frame = 0; frame <= vstate->curframe; frame++) {
9420                 parent = vparent->frame[frame];
9421                 state = vstate->frame[frame];
9422                 parent_reg = parent->regs;
9423                 state_reg = state->regs;
9424                 /* We don't need to worry about FP liveness, it's read-only */
9425                 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9426                         err = propagate_liveness_reg(env, &state_reg[i],
9427                                                      &parent_reg[i]);
9428                         if (err < 0)
9429                                 return err;
9430                         if (err == REG_LIVE_READ64)
9431                                 mark_insn_zext(env, &parent_reg[i]);
9432                 }
9433
9434                 /* Propagate stack slots. */
9435                 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
9436                             i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9437                         parent_reg = &parent->stack[i].spilled_ptr;
9438                         state_reg = &state->stack[i].spilled_ptr;
9439                         err = propagate_liveness_reg(env, state_reg,
9440                                                      parent_reg);
9441                         if (err < 0)
9442                                 return err;
9443                 }
9444         }
9445         return 0;
9446 }
9447
9448 /* find precise scalars in the previous equivalent state and
9449  * propagate them into the current state
9450  */
9451 static int propagate_precision(struct bpf_verifier_env *env,
9452                                const struct bpf_verifier_state *old)
9453 {
9454         struct bpf_reg_state *state_reg;
9455         struct bpf_func_state *state;
9456         int i, err = 0;
9457
9458         state = old->frame[old->curframe];
9459         state_reg = state->regs;
9460         for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9461                 if (state_reg->type != SCALAR_VALUE ||
9462                     !state_reg->precise)
9463                         continue;
9464                 if (env->log.level & BPF_LOG_LEVEL2)
9465                         verbose(env, "propagating r%d\n", i);
9466                 err = mark_chain_precision(env, i);
9467                 if (err < 0)
9468                         return err;
9469         }
9470
9471         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9472                 if (state->stack[i].slot_type[0] != STACK_SPILL)
9473                         continue;
9474                 state_reg = &state->stack[i].spilled_ptr;
9475                 if (state_reg->type != SCALAR_VALUE ||
9476                     !state_reg->precise)
9477                         continue;
9478                 if (env->log.level & BPF_LOG_LEVEL2)
9479                         verbose(env, "propagating fp%d\n",
9480                                 (-i - 1) * BPF_REG_SIZE);
9481                 err = mark_chain_precision_stack(env, i);
9482                 if (err < 0)
9483                         return err;
9484         }
9485         return 0;
9486 }
9487
9488 static bool states_maybe_looping(struct bpf_verifier_state *old,
9489                                  struct bpf_verifier_state *cur)
9490 {
9491         struct bpf_func_state *fold, *fcur;
9492         int i, fr = cur->curframe;
9493
9494         if (old->curframe != fr)
9495                 return false;
9496
9497         fold = old->frame[fr];
9498         fcur = cur->frame[fr];
9499         for (i = 0; i < MAX_BPF_REG; i++)
9500                 if (memcmp(&fold->regs[i], &fcur->regs[i],
9501                            offsetof(struct bpf_reg_state, parent)))
9502                         return false;
9503         return true;
9504 }
9505
9506
9507 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9508 {
9509         struct bpf_verifier_state_list *new_sl;
9510         struct bpf_verifier_state_list *sl, **pprev;
9511         struct bpf_verifier_state *cur = env->cur_state, *new;
9512         int i, j, err, states_cnt = 0;
9513         bool add_new_state = env->test_state_freq ? true : false;
9514
9515         cur->last_insn_idx = env->prev_insn_idx;
9516         if (!env->insn_aux_data[insn_idx].prune_point)
9517                 /* this 'insn_idx' instruction wasn't marked, so we will not
9518                  * be doing state search here
9519                  */
9520                 return 0;
9521
9522         /* bpf progs typically have pruning point every 4 instructions
9523          * http://vger.kernel.org/bpfconf2019.html#session-1
9524          * Do not add new state for future pruning if the verifier hasn't seen
9525          * at least 2 jumps and at least 8 instructions.
9526          * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9527          * In tests that amounts to up to 50% reduction into total verifier
9528          * memory consumption and 20% verifier time speedup.
9529          */
9530         if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
9531             env->insn_processed - env->prev_insn_processed >= 8)
9532                 add_new_state = true;
9533
9534         pprev = explored_state(env, insn_idx);
9535         sl = *pprev;
9536
9537         clean_live_states(env, insn_idx, cur);
9538
9539         while (sl) {
9540                 states_cnt++;
9541                 if (sl->state.insn_idx != insn_idx)
9542                         goto next;
9543                 if (sl->state.branches) {
9544                         if (states_maybe_looping(&sl->state, cur) &&
9545                             states_equal(env, &sl->state, cur)) {
9546                                 verbose_linfo(env, insn_idx, "; ");
9547                                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9548                                 return -EINVAL;
9549                         }
9550                         /* if the verifier is processing a loop, avoid adding new state
9551                          * too often, since different loop iterations have distinct
9552                          * states and may not help future pruning.
9553                          * This threshold shouldn't be too low to make sure that
9554                          * a loop with large bound will be rejected quickly.
9555                          * The most abusive loop will be:
9556                          * r1 += 1
9557                          * if r1 < 1000000 goto pc-2
9558                          * 1M insn_procssed limit / 100 == 10k peak states.
9559                          * This threshold shouldn't be too high either, since states
9560                          * at the end of the loop are likely to be useful in pruning.
9561                          */
9562                         if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9563                             env->insn_processed - env->prev_insn_processed < 100)
9564                                 add_new_state = false;
9565                         goto miss;
9566                 }
9567                 if (states_equal(env, &sl->state, cur)) {
9568                         sl->hit_cnt++;
9569                         /* reached equivalent register/stack state,
9570                          * prune the search.
9571                          * Registers read by the continuation are read by us.
9572                          * If we have any write marks in env->cur_state, they
9573                          * will prevent corresponding reads in the continuation
9574                          * from reaching our parent (an explored_state).  Our
9575                          * own state will get the read marks recorded, but
9576                          * they'll be immediately forgotten as we're pruning
9577                          * this state and will pop a new one.
9578                          */
9579                         err = propagate_liveness(env, &sl->state, cur);
9580
9581                         /* if previous state reached the exit with precision and
9582                          * current state is equivalent to it (except precsion marks)
9583                          * the precision needs to be propagated back in
9584                          * the current state.
9585                          */
9586                         err = err ? : push_jmp_history(env, cur);
9587                         err = err ? : propagate_precision(env, &sl->state);
9588                         if (err)
9589                                 return err;
9590                         return 1;
9591                 }
9592 miss:
9593                 /* when new state is not going to be added do not increase miss count.
9594                  * Otherwise several loop iterations will remove the state
9595                  * recorded earlier. The goal of these heuristics is to have
9596                  * states from some iterations of the loop (some in the beginning
9597                  * and some at the end) to help pruning.
9598                  */
9599                 if (add_new_state)
9600                         sl->miss_cnt++;
9601                 /* heuristic to determine whether this state is beneficial
9602                  * to keep checking from state equivalence point of view.
9603                  * Higher numbers increase max_states_per_insn and verification time,
9604                  * but do not meaningfully decrease insn_processed.
9605                  */
9606                 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9607                         /* the state is unlikely to be useful. Remove it to
9608                          * speed up verification
9609                          */
9610                         *pprev = sl->next;
9611                         if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9612                                 u32 br = sl->state.branches;
9613
9614                                 WARN_ONCE(br,
9615                                           "BUG live_done but branches_to_explore %d\n",
9616                                           br);
9617                                 free_verifier_state(&sl->state, false);
9618                                 kfree(sl);
9619                                 env->peak_states--;
9620                         } else {
9621                                 /* cannot free this state, since parentage chain may
9622                                  * walk it later. Add it for free_list instead to
9623                                  * be freed at the end of verification
9624                                  */
9625                                 sl->next = env->free_list;
9626                                 env->free_list = sl;
9627                         }
9628                         sl = *pprev;
9629                         continue;
9630                 }
9631 next:
9632                 pprev = &sl->next;
9633                 sl = *pprev;
9634         }
9635
9636         if (env->max_states_per_insn < states_cnt)
9637                 env->max_states_per_insn = states_cnt;
9638
9639         if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
9640                 return push_jmp_history(env, cur);
9641
9642         if (!add_new_state)
9643                 return push_jmp_history(env, cur);
9644
9645         /* There were no equivalent states, remember the current one.
9646          * Technically the current state is not proven to be safe yet,
9647          * but it will either reach outer most bpf_exit (which means it's safe)
9648          * or it will be rejected. When there are no loops the verifier won't be
9649          * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9650          * again on the way to bpf_exit.
9651          * When looping the sl->state.branches will be > 0 and this state
9652          * will not be considered for equivalence until branches == 0.
9653          */
9654         new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9655         if (!new_sl)
9656                 return -ENOMEM;
9657         env->total_states++;
9658         env->peak_states++;
9659         env->prev_jmps_processed = env->jmps_processed;
9660         env->prev_insn_processed = env->insn_processed;
9661
9662         /* add new state to the head of linked list */
9663         new = &new_sl->state;
9664         err = copy_verifier_state(new, cur);
9665         if (err) {
9666                 free_verifier_state(new, false);
9667                 kfree(new_sl);
9668                 return err;
9669         }
9670         new->insn_idx = insn_idx;
9671         WARN_ONCE(new->branches != 1,
9672                   "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9673
9674         cur->parent = new;
9675         cur->first_insn_idx = insn_idx;
9676         clear_jmp_history(cur);
9677         new_sl->next = *explored_state(env, insn_idx);
9678         *explored_state(env, insn_idx) = new_sl;
9679         /* connect new state to parentage chain. Current frame needs all
9680          * registers connected. Only r6 - r9 of the callers are alive (pushed
9681          * to the stack implicitly by JITs) so in callers' frames connect just
9682          * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9683          * the state of the call instruction (with WRITTEN set), and r0 comes
9684          * from callee with its full parentage chain, anyway.
9685          */
9686         /* clear write marks in current state: the writes we did are not writes
9687          * our child did, so they don't screen off its reads from us.
9688          * (There are no read marks in current state, because reads always mark
9689          * their parent and current state never has children yet.  Only
9690          * explored_states can get read marks.)
9691          */
9692         for (j = 0; j <= cur->curframe; j++) {
9693                 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
9694                         cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9695                 for (i = 0; i < BPF_REG_FP; i++)
9696                         cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9697         }
9698
9699         /* all stack frames are accessible from callee, clear them all */
9700         for (j = 0; j <= cur->curframe; j++) {
9701                 struct bpf_func_state *frame = cur->frame[j];
9702                 struct bpf_func_state *newframe = new->frame[j];
9703
9704                 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9705                         frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9706                         frame->stack[i].spilled_ptr.parent =
9707                                                 &newframe->stack[i].spilled_ptr;
9708                 }
9709         }
9710         return 0;
9711 }
9712
9713 /* Return true if it's OK to have the same insn return a different type. */
9714 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9715 {
9716         switch (type) {
9717         case PTR_TO_CTX:
9718         case PTR_TO_SOCKET:
9719         case PTR_TO_SOCKET_OR_NULL:
9720         case PTR_TO_SOCK_COMMON:
9721         case PTR_TO_SOCK_COMMON_OR_NULL:
9722         case PTR_TO_TCP_SOCK:
9723         case PTR_TO_TCP_SOCK_OR_NULL:
9724         case PTR_TO_XDP_SOCK:
9725         case PTR_TO_BTF_ID:
9726         case PTR_TO_BTF_ID_OR_NULL:
9727                 return false;
9728         default:
9729                 return true;
9730         }
9731 }
9732
9733 /* If an instruction was previously used with particular pointer types, then we
9734  * need to be careful to avoid cases such as the below, where it may be ok
9735  * for one branch accessing the pointer, but not ok for the other branch:
9736  *
9737  * R1 = sock_ptr
9738  * goto X;
9739  * ...
9740  * R1 = some_other_valid_ptr;
9741  * goto X;
9742  * ...
9743  * R2 = *(u32 *)(R1 + 0);
9744  */
9745 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9746 {
9747         return src != prev && (!reg_type_mismatch_ok(src) ||
9748                                !reg_type_mismatch_ok(prev));
9749 }
9750
9751 static int do_check(struct bpf_verifier_env *env)
9752 {
9753         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9754         struct bpf_verifier_state *state = env->cur_state;
9755         struct bpf_insn *insns = env->prog->insnsi;
9756         struct bpf_reg_state *regs;
9757         int insn_cnt = env->prog->len;
9758         bool do_print_state = false;
9759         int prev_insn_idx = -1;
9760
9761         for (;;) {
9762                 struct bpf_insn *insn;
9763                 u8 class;
9764                 int err;
9765
9766                 env->prev_insn_idx = prev_insn_idx;
9767                 if (env->insn_idx >= insn_cnt) {
9768                         verbose(env, "invalid insn idx %d insn_cnt %d\n",
9769                                 env->insn_idx, insn_cnt);
9770                         return -EFAULT;
9771                 }
9772
9773                 insn = &insns[env->insn_idx];
9774                 class = BPF_CLASS(insn->code);
9775
9776                 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9777                         verbose(env,
9778                                 "BPF program is too large. Processed %d insn\n",
9779                                 env->insn_processed);
9780                         return -E2BIG;
9781                 }
9782
9783                 err = is_state_visited(env, env->insn_idx);
9784                 if (err < 0)
9785                         return err;
9786                 if (err == 1) {
9787                         /* found equivalent state, can prune the search */
9788                         if (env->log.level & BPF_LOG_LEVEL) {
9789                                 if (do_print_state)
9790                                         verbose(env, "\nfrom %d to %d%s: safe\n",
9791                                                 env->prev_insn_idx, env->insn_idx,
9792                                                 env->cur_state->speculative ?
9793                                                 " (speculative execution)" : "");
9794                                 else
9795                                         verbose(env, "%d: safe\n", env->insn_idx);
9796                         }
9797                         goto process_bpf_exit;
9798                 }
9799
9800                 if (signal_pending(current))
9801                         return -EAGAIN;
9802
9803                 if (need_resched())
9804                         cond_resched();
9805
9806                 if (env->log.level & BPF_LOG_LEVEL2 ||
9807                     (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
9808                         if (env->log.level & BPF_LOG_LEVEL2)
9809                                 verbose(env, "%d:", env->insn_idx);
9810                         else
9811                                 verbose(env, "\nfrom %d to %d%s:",
9812                                         env->prev_insn_idx, env->insn_idx,
9813                                         env->cur_state->speculative ?
9814                                         " (speculative execution)" : "");
9815                         print_verifier_state(env, state->frame[state->curframe]);
9816                         do_print_state = false;
9817                 }
9818
9819                 if (env->log.level & BPF_LOG_LEVEL) {
9820                         const struct bpf_insn_cbs cbs = {
9821                                 .cb_print       = verbose,
9822                                 .private_data   = env,
9823                         };
9824
9825                         verbose_linfo(env, env->insn_idx, "; ");
9826                         verbose(env, "%d: ", env->insn_idx);
9827                         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9828                 }
9829
9830                 if (bpf_prog_is_dev_bound(env->prog->aux)) {
9831                         err = bpf_prog_offload_verify_insn(env, env->insn_idx,
9832                                                            env->prev_insn_idx);
9833                         if (err)
9834                                 return err;
9835                 }
9836
9837                 regs = cur_regs(env);
9838                 sanitize_mark_insn_seen(env);
9839                 prev_insn_idx = env->insn_idx;
9840
9841                 if (class == BPF_ALU || class == BPF_ALU64) {
9842                         err = check_alu_op(env, insn);
9843                         if (err)
9844                                 return err;
9845
9846                 } else if (class == BPF_LDX) {
9847                         enum bpf_reg_type *prev_src_type, src_reg_type;
9848
9849                         /* check for reserved fields is already done */
9850
9851                         /* check src operand */
9852                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
9853                         if (err)
9854                                 return err;
9855
9856                         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9857                         if (err)
9858                                 return err;
9859
9860                         src_reg_type = regs[insn->src_reg].type;
9861
9862                         /* check that memory (src_reg + off) is readable,
9863                          * the state of dst_reg will be updated by this func
9864                          */
9865                         err = check_mem_access(env, env->insn_idx, insn->src_reg,
9866                                                insn->off, BPF_SIZE(insn->code),
9867                                                BPF_READ, insn->dst_reg, false);
9868                         if (err)
9869                                 return err;
9870
9871                         prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9872
9873                         if (*prev_src_type == NOT_INIT) {
9874                                 /* saw a valid insn
9875                                  * dst_reg = *(u32 *)(src_reg + off)
9876                                  * save type to validate intersecting paths
9877                                  */
9878                                 *prev_src_type = src_reg_type;
9879
9880                         } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9881                                 /* ABuser program is trying to use the same insn
9882                                  * dst_reg = *(u32*) (src_reg + off)
9883                                  * with different pointer types:
9884                                  * src_reg == ctx in one branch and
9885                                  * src_reg == stack|map in some other branch.
9886                                  * Reject it.
9887                                  */
9888                                 verbose(env, "same insn cannot be used with different pointers\n");
9889                                 return -EINVAL;
9890                         }
9891
9892                 } else if (class == BPF_STX) {
9893                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
9894
9895                         if (BPF_MODE(insn->code) == BPF_XADD) {
9896                                 err = check_xadd(env, env->insn_idx, insn);
9897                                 if (err)
9898                                         return err;
9899                                 env->insn_idx++;
9900                                 continue;
9901                         }
9902
9903                         /* check src1 operand */
9904                         err = check_reg_arg(env, insn->src_reg, SRC_OP);
9905                         if (err)
9906                                 return err;
9907                         /* check src2 operand */
9908                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9909                         if (err)
9910                                 return err;
9911
9912                         dst_reg_type = regs[insn->dst_reg].type;
9913
9914                         /* check that memory (dst_reg + off) is writeable */
9915                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
9916                                                insn->off, BPF_SIZE(insn->code),
9917                                                BPF_WRITE, insn->src_reg, false);
9918                         if (err)
9919                                 return err;
9920
9921                         prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9922
9923                         if (*prev_dst_type == NOT_INIT) {
9924                                 *prev_dst_type = dst_reg_type;
9925                         } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9926                                 verbose(env, "same insn cannot be used with different pointers\n");
9927                                 return -EINVAL;
9928                         }
9929
9930                 } else if (class == BPF_ST) {
9931                         if (BPF_MODE(insn->code) != BPF_MEM ||
9932                             insn->src_reg != BPF_REG_0) {
9933                                 verbose(env, "BPF_ST uses reserved fields\n");
9934                                 return -EINVAL;
9935                         }
9936                         /* check src operand */
9937                         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9938                         if (err)
9939                                 return err;
9940
9941                         if (is_ctx_reg(env, insn->dst_reg)) {
9942                                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
9943                                         insn->dst_reg,
9944                                         reg_type_str[reg_state(env, insn->dst_reg)->type]);
9945                                 return -EACCES;
9946                         }
9947
9948                         /* check that memory (dst_reg + off) is writeable */
9949                         err = check_mem_access(env, env->insn_idx, insn->dst_reg,
9950                                                insn->off, BPF_SIZE(insn->code),
9951                                                BPF_WRITE, -1, false);
9952                         if (err)
9953                                 return err;
9954
9955                 } else if (class == BPF_JMP || class == BPF_JMP32) {
9956                         u8 opcode = BPF_OP(insn->code);
9957
9958                         env->jmps_processed++;
9959                         if (opcode == BPF_CALL) {
9960                                 if (BPF_SRC(insn->code) != BPF_K ||
9961                                     insn->off != 0 ||
9962                                     (insn->src_reg != BPF_REG_0 &&
9963                                      insn->src_reg != BPF_PSEUDO_CALL) ||
9964                                     insn->dst_reg != BPF_REG_0 ||
9965                                     class == BPF_JMP32) {
9966                                         verbose(env, "BPF_CALL uses reserved fields\n");
9967                                         return -EINVAL;
9968                                 }
9969
9970                                 if (env->cur_state->active_spin_lock &&
9971                                     (insn->src_reg == BPF_PSEUDO_CALL ||
9972                                      insn->imm != BPF_FUNC_spin_unlock)) {
9973                                         verbose(env, "function calls are not allowed while holding a lock\n");
9974                                         return -EINVAL;
9975                                 }
9976                                 if (insn->src_reg == BPF_PSEUDO_CALL)
9977                                         err = check_func_call(env, insn, &env->insn_idx);
9978                                 else
9979                                         err = check_helper_call(env, insn->imm, env->insn_idx);
9980                                 if (err)
9981                                         return err;
9982
9983                         } else if (opcode == BPF_JA) {
9984                                 if (BPF_SRC(insn->code) != BPF_K ||
9985                                     insn->imm != 0 ||
9986                                     insn->src_reg != BPF_REG_0 ||
9987                                     insn->dst_reg != BPF_REG_0 ||
9988                                     class == BPF_JMP32) {
9989                                         verbose(env, "BPF_JA uses reserved fields\n");
9990                                         return -EINVAL;
9991                                 }
9992
9993                                 env->insn_idx += insn->off + 1;
9994                                 continue;
9995
9996                         } else if (opcode == BPF_EXIT) {
9997                                 if (BPF_SRC(insn->code) != BPF_K ||
9998                                     insn->imm != 0 ||
9999                                     insn->src_reg != BPF_REG_0 ||
10000                                     insn->dst_reg != BPF_REG_0 ||
10001                                     class == BPF_JMP32) {
10002                                         verbose(env, "BPF_EXIT uses reserved fields\n");
10003                                         return -EINVAL;
10004                                 }
10005
10006                                 if (env->cur_state->active_spin_lock) {
10007                                         verbose(env, "bpf_spin_unlock is missing\n");
10008                                         return -EINVAL;
10009                                 }
10010
10011                                 if (state->curframe) {
10012                                         /* exit from nested function */
10013                                         err = prepare_func_exit(env, &env->insn_idx);
10014                                         if (err)
10015                                                 return err;
10016                                         do_print_state = true;
10017                                         continue;
10018                                 }
10019
10020                                 err = check_reference_leak(env);
10021                                 if (err)
10022                                         return err;
10023
10024                                 err = check_return_code(env);
10025                                 if (err)
10026                                         return err;
10027 process_bpf_exit:
10028                                 update_branch_counts(env, env->cur_state);
10029                                 err = pop_stack(env, &prev_insn_idx,
10030                                                 &env->insn_idx, pop_log);
10031                                 if (err < 0) {
10032                                         if (err != -ENOENT)
10033                                                 return err;
10034                                         break;
10035                                 } else {
10036                                         do_print_state = true;
10037                                         continue;
10038                                 }
10039                         } else {
10040                                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
10041                                 if (err)
10042                                         return err;
10043                         }
10044                 } else if (class == BPF_LD) {
10045                         u8 mode = BPF_MODE(insn->code);
10046
10047                         if (mode == BPF_ABS || mode == BPF_IND) {
10048                                 err = check_ld_abs(env, insn);
10049                                 if (err)
10050                                         return err;
10051
10052                         } else if (mode == BPF_IMM) {
10053                                 err = check_ld_imm(env, insn);
10054                                 if (err)
10055                                         return err;
10056
10057                                 env->insn_idx++;
10058                                 sanitize_mark_insn_seen(env);
10059                         } else {
10060                                 verbose(env, "invalid BPF_LD mode\n");
10061                                 return -EINVAL;
10062                         }
10063                 } else {
10064                         verbose(env, "unknown insn class %d\n", class);
10065                         return -EINVAL;
10066                 }
10067
10068                 env->insn_idx++;
10069         }
10070
10071         return 0;
10072 }
10073
10074 /* replace pseudo btf_id with kernel symbol address */
10075 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
10076                                struct bpf_insn *insn,
10077                                struct bpf_insn_aux_data *aux)
10078 {
10079         const struct btf_var_secinfo *vsi;
10080         const struct btf_type *datasec;
10081         const struct btf_type *t;
10082         const char *sym_name;
10083         bool percpu = false;
10084         u32 type, id = insn->imm;
10085         s32 datasec_id;
10086         u64 addr;
10087         int i;
10088
10089         if (!btf_vmlinux) {
10090                 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
10091                 return -EINVAL;
10092         }
10093
10094         if (insn[1].imm != 0) {
10095                 verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
10096                 return -EINVAL;
10097         }
10098
10099         t = btf_type_by_id(btf_vmlinux, id);
10100         if (!t) {
10101                 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
10102                 return -ENOENT;
10103         }
10104
10105         if (!btf_type_is_var(t)) {
10106                 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
10107                         id);
10108                 return -EINVAL;
10109         }
10110
10111         sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
10112         addr = kallsyms_lookup_name(sym_name);
10113         if (!addr) {
10114                 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
10115                         sym_name);
10116                 return -ENOENT;
10117         }
10118
10119         datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
10120                                            BTF_KIND_DATASEC);
10121         if (datasec_id > 0) {
10122                 datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10123                 for_each_vsi(i, datasec, vsi) {
10124                         if (vsi->type == id) {
10125                                 percpu = true;
10126                                 break;
10127                         }
10128                 }
10129         }
10130
10131         insn[0].imm = (u32)addr;
10132         insn[1].imm = addr >> 32;
10133
10134         type = t->type;
10135         t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10136         if (percpu) {
10137                 aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10138                 aux->btf_var.btf_id = type;
10139         } else if (!btf_type_is_struct(t)) {
10140                 const struct btf_type *ret;
10141                 const char *tname;
10142                 u32 tsize;
10143
10144                 /* resolve the type size of ksym. */
10145                 ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10146                 if (IS_ERR(ret)) {
10147                         tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10148                         verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
10149                                 tname, PTR_ERR(ret));
10150                         return -EINVAL;
10151                 }
10152                 aux->btf_var.reg_type = PTR_TO_MEM;
10153                 aux->btf_var.mem_size = tsize;
10154         } else {
10155                 aux->btf_var.reg_type = PTR_TO_BTF_ID;
10156                 aux->btf_var.btf_id = type;
10157         }
10158         return 0;
10159 }
10160
10161 static int check_map_prealloc(struct bpf_map *map)
10162 {
10163         return (map->map_type != BPF_MAP_TYPE_HASH &&
10164                 map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10165                 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
10166                 !(map->map_flags & BPF_F_NO_PREALLOC);
10167 }
10168
10169 static bool is_tracing_prog_type(enum bpf_prog_type type)
10170 {
10171         switch (type) {
10172         case BPF_PROG_TYPE_KPROBE:
10173         case BPF_PROG_TYPE_TRACEPOINT:
10174         case BPF_PROG_TYPE_PERF_EVENT:
10175         case BPF_PROG_TYPE_RAW_TRACEPOINT:
10176                 return true;
10177         default:
10178                 return false;
10179         }
10180 }
10181
10182 static bool is_preallocated_map(struct bpf_map *map)
10183 {
10184         if (!check_map_prealloc(map))
10185                 return false;
10186         if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
10187                 return false;
10188         return true;
10189 }
10190
10191 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
10192                                         struct bpf_map *map,
10193                                         struct bpf_prog *prog)
10194
10195 {
10196         enum bpf_prog_type prog_type = resolve_prog_type(prog);
10197         /*
10198          * Validate that trace type programs use preallocated hash maps.
10199          *
10200          * For programs attached to PERF events this is mandatory as the
10201          * perf NMI can hit any arbitrary code sequence.
10202          *
10203          * All other trace types using preallocated hash maps are unsafe as
10204          * well because tracepoint or kprobes can be inside locked regions
10205          * of the memory allocator or at a place where a recursion into the
10206          * memory allocator would see inconsistent state.
10207          *
10208          * On RT enabled kernels run-time allocation of all trace type
10209          * programs is strictly prohibited due to lock type constraints. On
10210          * !RT kernels it is allowed for backwards compatibility reasons for
10211          * now, but warnings are emitted so developers are made aware of
10212          * the unsafety and can fix their programs before this is enforced.
10213          */
10214         if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10215                 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
10216                         verbose(env, "perf_event programs can only use preallocated hash map\n");
10217                         return -EINVAL;
10218                 }
10219                 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10220                         verbose(env, "trace type programs can only use preallocated hash map\n");
10221                         return -EINVAL;
10222                 }
10223                 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10224                 verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10225         }
10226
10227         if ((is_tracing_prog_type(prog_type) ||
10228              prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
10229             map_value_has_spin_lock(map)) {
10230                 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10231                 return -EINVAL;
10232         }
10233
10234         if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
10235             !bpf_offload_prog_map_match(prog, map)) {
10236                 verbose(env, "offload device mismatch between prog and map\n");
10237                 return -EINVAL;
10238         }
10239
10240         if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10241                 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10242                 return -EINVAL;
10243         }
10244
10245         if (prog->aux->sleepable)
10246                 switch (map->map_type) {
10247                 case BPF_MAP_TYPE_HASH:
10248                 case BPF_MAP_TYPE_LRU_HASH:
10249                 case BPF_MAP_TYPE_ARRAY:
10250                         if (!is_preallocated_map(map)) {
10251                                 verbose(env,
10252                                         "Sleepable programs can only use preallocated hash maps\n");
10253                                 return -EINVAL;
10254                         }
10255                         break;
10256                 default:
10257                         verbose(env,
10258                                 "Sleepable programs can only use array and hash maps\n");
10259                         return -EINVAL;
10260                 }
10261
10262         return 0;
10263 }
10264
10265 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10266 {
10267         return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
10268                 map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10269 }
10270
10271 /* find and rewrite pseudo imm in ld_imm64 instructions:
10272  *
10273  * 1. if it accesses map FD, replace it with actual map pointer.
10274  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10275  *
10276  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
10277  */
10278 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
10279 {
10280         struct bpf_insn *insn = env->prog->insnsi;
10281         int insn_cnt = env->prog->len;
10282         int i, j, err;
10283
10284         err = bpf_prog_calc_tag(env->prog);
10285         if (err)
10286                 return err;
10287
10288         for (i = 0; i < insn_cnt; i++, insn++) {
10289                 if (BPF_CLASS(insn->code) == BPF_LDX &&
10290                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
10291                         verbose(env, "BPF_LDX uses reserved fields\n");
10292                         return -EINVAL;
10293                 }
10294
10295                 if (BPF_CLASS(insn->code) == BPF_STX &&
10296                     ((BPF_MODE(insn->code) != BPF_MEM &&
10297                       BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
10298                         verbose(env, "BPF_STX uses reserved fields\n");
10299                         return -EINVAL;
10300                 }
10301
10302                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10303                         struct bpf_insn_aux_data *aux;
10304                         struct bpf_map *map;
10305                         struct fd f;
10306                         u64 addr;
10307
10308                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
10309                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
10310                             insn[1].off != 0) {
10311                                 verbose(env, "invalid bpf_ld_imm64 insn\n");
10312                                 return -EINVAL;
10313                         }
10314
10315                         if (insn[0].src_reg == 0)
10316                                 /* valid generic load 64-bit imm */
10317                                 goto next_insn;
10318
10319                         if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10320                                 aux = &env->insn_aux_data[i];
10321                                 err = check_pseudo_btf_id(env, insn, aux);
10322                                 if (err)
10323                                         return err;
10324                                 goto next_insn;
10325                         }
10326
10327                         /* In final convert_pseudo_ld_imm64() step, this is
10328                          * converted into regular 64-bit imm load insn.
10329                          */
10330                         if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
10331                              insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10332                             (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
10333                              insn[1].imm != 0)) {
10334                                 verbose(env,
10335                                         "unrecognized bpf_ld_imm64 insn\n");
10336                                 return -EINVAL;
10337                         }
10338
10339                         f = fdget(insn[0].imm);
10340                         map = __bpf_map_get(f);
10341                         if (IS_ERR(map)) {
10342                                 verbose(env, "fd %d is not pointing to valid bpf_map\n",
10343                                         insn[0].imm);
10344                                 return PTR_ERR(map);
10345                         }
10346
10347                         err = check_map_prog_compatibility(env, map, env->prog);
10348                         if (err) {
10349                                 fdput(f);
10350                                 return err;
10351                         }
10352
10353                         aux = &env->insn_aux_data[i];
10354                         if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10355                                 addr = (unsigned long)map;
10356                         } else {
10357                                 u32 off = insn[1].imm;
10358
10359                                 if (off >= BPF_MAX_VAR_OFF) {
10360                                         verbose(env, "direct value offset of %u is not allowed\n", off);
10361                                         fdput(f);
10362                                         return -EINVAL;
10363                                 }
10364
10365                                 if (!map->ops->map_direct_value_addr) {
10366                                         verbose(env, "no direct value access support for this map type\n");
10367                                         fdput(f);
10368                                         return -EINVAL;
10369                                 }
10370
10371                                 err = map->ops->map_direct_value_addr(map, &addr, off);
10372                                 if (err) {
10373                                         verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
10374                                                 map->value_size, off);
10375                                         fdput(f);
10376                                         return err;
10377                                 }
10378
10379                                 aux->map_off = off;
10380                                 addr += off;
10381                         }
10382
10383                         insn[0].imm = (u32)addr;
10384                         insn[1].imm = addr >> 32;
10385
10386                         /* check whether we recorded this map already */
10387                         for (j = 0; j < env->used_map_cnt; j++) {
10388                                 if (env->used_maps[j] == map) {
10389                                         aux->map_index = j;
10390                                         fdput(f);
10391                                         goto next_insn;
10392                                 }
10393                         }
10394
10395                         if (env->used_map_cnt >= MAX_USED_MAPS) {
10396                                 fdput(f);
10397                                 return -E2BIG;
10398                         }
10399
10400                         /* hold the map. If the program is rejected by verifier,
10401                          * the map will be released by release_maps() or it
10402                          * will be used by the valid program until it's unloaded
10403                          * and all maps are released in free_used_maps()
10404                          */
10405                         bpf_map_inc(map);
10406
10407                         aux->map_index = env->used_map_cnt;
10408                         env->used_maps[env->used_map_cnt++] = map;
10409
10410                         if (bpf_map_is_cgroup_storage(map) &&
10411                             bpf_cgroup_storage_assign(env->prog->aux, map)) {
10412                                 verbose(env, "only one cgroup storage of each type is allowed\n");
10413                                 fdput(f);
10414                                 return -EBUSY;
10415                         }
10416
10417                         fdput(f);
10418 next_insn:
10419                         insn++;
10420                         i++;
10421                         continue;
10422                 }
10423
10424                 /* Basic sanity check before we invest more work here. */
10425                 if (!bpf_opcode_in_insntable(insn->code)) {
10426                         verbose(env, "unknown opcode %02x\n", insn->code);
10427                         return -EINVAL;
10428                 }
10429         }
10430
10431         /* now all pseudo BPF_LD_IMM64 instructions load valid
10432          * 'struct bpf_map *' into a register instead of user map_fd.
10433          * These pointers will be used later by verifier to validate map access.
10434          */
10435         return 0;
10436 }
10437
10438 /* drop refcnt of maps used by the rejected program */
10439 static void release_maps(struct bpf_verifier_env *env)
10440 {
10441         __bpf_free_used_maps(env->prog->aux, env->used_maps,
10442                              env->used_map_cnt);
10443 }
10444
10445 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
10446 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
10447 {
10448         struct bpf_insn *insn = env->prog->insnsi;
10449         int insn_cnt = env->prog->len;
10450         int i;
10451
10452         for (i = 0; i < insn_cnt; i++, insn++)
10453                 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
10454                         insn->src_reg = 0;
10455 }
10456
10457 /* single env->prog->insni[off] instruction was replaced with the range
10458  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
10459  * [0, off) and [off, end) to new locations, so the patched range stays zero
10460  */
10461 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
10462                                  struct bpf_insn_aux_data *new_data,
10463                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
10464 {
10465         struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10466         struct bpf_insn *insn = new_prog->insnsi;
10467         u32 old_seen = old_data[off].seen;
10468         u32 prog_len;
10469         int i;
10470
10471         /* aux info at OFF always needs adjustment, no matter fast path
10472          * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10473          * original insn at old prog.
10474          */
10475         old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10476
10477         if (cnt == 1)
10478                 return;
10479         prog_len = new_prog->len;
10480
10481         memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
10482         memcpy(new_data + off + cnt - 1, old_data + off,
10483                sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
10484         for (i = off; i < off + cnt - 1; i++) {
10485                 /* Expand insni[off]'s seen count to the patched range. */
10486                 new_data[i].seen = old_seen;
10487                 new_data[i].zext_dst = insn_has_def32(env, insn + i);
10488         }
10489         env->insn_aux_data = new_data;
10490         vfree(old_data);
10491 }
10492
10493 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
10494 {
10495         int i;
10496
10497         if (len == 1)
10498                 return;
10499         /* NOTE: fake 'exit' subprog should be updated as well. */
10500         for (i = 0; i <= env->subprog_cnt; i++) {
10501                 if (env->subprog_info[i].start <= off)
10502                         continue;
10503                 env->subprog_info[i].start += len - 1;
10504         }
10505 }
10506
10507 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10508 {
10509         struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10510         int i, sz = prog->aux->size_poke_tab;
10511         struct bpf_jit_poke_descriptor *desc;
10512
10513         for (i = 0; i < sz; i++) {
10514                 desc = &tab[i];
10515                 if (desc->insn_idx <= off)
10516                         continue;
10517                 desc->insn_idx += len - 1;
10518         }
10519 }
10520
10521 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
10522                                             const struct bpf_insn *patch, u32 len)
10523 {
10524         struct bpf_prog *new_prog;
10525         struct bpf_insn_aux_data *new_data = NULL;
10526
10527         if (len > 1) {
10528                 new_data = vzalloc(array_size(env->prog->len + len - 1,
10529                                               sizeof(struct bpf_insn_aux_data)));
10530                 if (!new_data)
10531                         return NULL;
10532         }
10533
10534         new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10535         if (IS_ERR(new_prog)) {
10536                 if (PTR_ERR(new_prog) == -ERANGE)
10537                         verbose(env,
10538                                 "insn %d cannot be patched due to 16-bit range\n",
10539                                 env->insn_aux_data[off].orig_idx);
10540                 vfree(new_data);
10541                 return NULL;
10542         }
10543         adjust_insn_aux_data(env, new_data, new_prog, off, len);
10544         adjust_subprog_starts(env, off, len);
10545         adjust_poke_descs(new_prog, off, len);
10546         return new_prog;
10547 }
10548
10549 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
10550                                               u32 off, u32 cnt)
10551 {
10552         int i, j;
10553
10554         /* find first prog starting at or after off (first to remove) */
10555         for (i = 0; i < env->subprog_cnt; i++)
10556                 if (env->subprog_info[i].start >= off)
10557                         break;
10558         /* find first prog starting at or after off + cnt (first to stay) */
10559         for (j = i; j < env->subprog_cnt; j++)
10560                 if (env->subprog_info[j].start >= off + cnt)
10561                         break;
10562         /* if j doesn't start exactly at off + cnt, we are just removing
10563          * the front of previous prog
10564          */
10565         if (env->subprog_info[j].start != off + cnt)
10566                 j--;
10567
10568         if (j > i) {
10569                 struct bpf_prog_aux *aux = env->prog->aux;
10570                 int move;
10571
10572                 /* move fake 'exit' subprog as well */
10573                 move = env->subprog_cnt + 1 - j;
10574
10575                 memmove(env->subprog_info + i,
10576                         env->subprog_info + j,
10577                         sizeof(*env->subprog_info) * move);
10578                 env->subprog_cnt -= j - i;
10579
10580                 /* remove func_info */
10581                 if (aux->func_info) {
10582                         move = aux->func_info_cnt - j;
10583
10584                         memmove(aux->func_info + i,
10585                                 aux->func_info + j,
10586                                 sizeof(*aux->func_info) * move);
10587                         aux->func_info_cnt -= j - i;
10588                         /* func_info->insn_off is set after all code rewrites,
10589                          * in adjust_btf_func() - no need to adjust
10590                          */
10591                 }
10592         } else {
10593                 /* convert i from "first prog to remove" to "first to adjust" */
10594                 if (env->subprog_info[i].start == off)
10595                         i++;
10596         }
10597
10598         /* update fake 'exit' subprog as well */
10599         for (; i <= env->subprog_cnt; i++)
10600                 env->subprog_info[i].start -= cnt;
10601
10602         return 0;
10603 }
10604
10605 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
10606                                       u32 cnt)
10607 {
10608         struct bpf_prog *prog = env->prog;
10609         u32 i, l_off, l_cnt, nr_linfo;
10610         struct bpf_line_info *linfo;
10611
10612         nr_linfo = prog->aux->nr_linfo;
10613         if (!nr_linfo)
10614                 return 0;
10615
10616         linfo = prog->aux->linfo;
10617
10618         /* find first line info to remove, count lines to be removed */
10619         for (i = 0; i < nr_linfo; i++)
10620                 if (linfo[i].insn_off >= off)
10621                         break;
10622
10623         l_off = i;
10624         l_cnt = 0;
10625         for (; i < nr_linfo; i++)
10626                 if (linfo[i].insn_off < off + cnt)
10627                         l_cnt++;
10628                 else
10629                         break;
10630
10631         /* First live insn doesn't match first live linfo, it needs to "inherit"
10632          * last removed linfo.  prog is already modified, so prog->len == off
10633          * means no live instructions after (tail of the program was removed).
10634          */
10635         if (prog->len != off && l_cnt &&
10636             (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10637                 l_cnt--;
10638                 linfo[--i].insn_off = off + cnt;
10639         }
10640
10641         /* remove the line info which refer to the removed instructions */
10642         if (l_cnt) {
10643                 memmove(linfo + l_off, linfo + i,
10644                         sizeof(*linfo) * (nr_linfo - i));
10645
10646                 prog->aux->nr_linfo -= l_cnt;
10647                 nr_linfo = prog->aux->nr_linfo;
10648         }
10649
10650         /* pull all linfo[i].insn_off >= off + cnt in by cnt */
10651         for (i = l_off; i < nr_linfo; i++)
10652                 linfo[i].insn_off -= cnt;
10653
10654         /* fix up all subprogs (incl. 'exit') which start >= off */
10655         for (i = 0; i <= env->subprog_cnt; i++)
10656                 if (env->subprog_info[i].linfo_idx > l_off) {
10657                         /* program may have started in the removed region but
10658                          * may not be fully removed
10659                          */
10660                         if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
10661                                 env->subprog_info[i].linfo_idx -= l_cnt;
10662                         else
10663                                 env->subprog_info[i].linfo_idx = l_off;
10664                 }
10665
10666         return 0;
10667 }
10668
10669 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10670 {
10671         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10672         unsigned int orig_prog_len = env->prog->len;
10673         int err;
10674
10675         if (bpf_prog_is_dev_bound(env->prog->aux))
10676                 bpf_prog_offload_remove_insns(env, off, cnt);
10677
10678         err = bpf_remove_insns(env->prog, off, cnt);
10679         if (err)
10680                 return err;
10681
10682         err = adjust_subprog_starts_after_remove(env, off, cnt);
10683         if (err)
10684                 return err;
10685
10686         err = bpf_adj_linfo_after_remove(env, off, cnt);
10687         if (err)
10688                 return err;
10689
10690         memmove(aux_data + off, aux_data + off + cnt,
10691                 sizeof(*aux_data) * (orig_prog_len - off - cnt));
10692
10693         return 0;
10694 }
10695
10696 /* The verifier does more data flow analysis than llvm and will not
10697  * explore branches that are dead at run time. Malicious programs can
10698  * have dead code too. Therefore replace all dead at-run-time code
10699  * with 'ja -1'.
10700  *
10701  * Just nops are not optimal, e.g. if they would sit at the end of the
10702  * program and through another bug we would manage to jump there, then
10703  * we'd execute beyond program memory otherwise. Returning exception
10704  * code also wouldn't work since we can have subprogs where the dead
10705  * code could be located.
10706  */
10707 static void sanitize_dead_code(struct bpf_verifier_env *env)
10708 {
10709         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10710         struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
10711         struct bpf_insn *insn = env->prog->insnsi;
10712         const int insn_cnt = env->prog->len;
10713         int i;
10714
10715         for (i = 0; i < insn_cnt; i++) {
10716                 if (aux_data[i].seen)
10717                         continue;
10718                 memcpy(insn + i, &trap, sizeof(trap));
10719                 aux_data[i].zext_dst = false;
10720         }
10721 }
10722
10723 static bool insn_is_cond_jump(u8 code)
10724 {
10725         u8 op;
10726
10727         if (BPF_CLASS(code) == BPF_JMP32)
10728                 return true;
10729
10730         if (BPF_CLASS(code) != BPF_JMP)
10731                 return false;
10732
10733         op = BPF_OP(code);
10734         return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10735 }
10736
10737 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10738 {
10739         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10740         struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10741         struct bpf_insn *insn = env->prog->insnsi;
10742         const int insn_cnt = env->prog->len;
10743         int i;
10744
10745         for (i = 0; i < insn_cnt; i++, insn++) {
10746                 if (!insn_is_cond_jump(insn->code))
10747                         continue;
10748
10749                 if (!aux_data[i + 1].seen)
10750                         ja.off = insn->off;
10751                 else if (!aux_data[i + 1 + insn->off].seen)
10752                         ja.off = 0;
10753                 else
10754                         continue;
10755
10756                 if (bpf_prog_is_dev_bound(env->prog->aux))
10757                         bpf_prog_offload_replace_insn(env, i, &ja);
10758
10759                 memcpy(insn, &ja, sizeof(ja));
10760         }
10761 }
10762
10763 static int opt_remove_dead_code(struct bpf_verifier_env *env)
10764 {
10765         struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10766         int insn_cnt = env->prog->len;
10767         int i, err;
10768
10769         for (i = 0; i < insn_cnt; i++) {
10770                 int j;
10771
10772                 j = 0;
10773                 while (i + j < insn_cnt && !aux_data[i + j].seen)
10774                         j++;
10775                 if (!j)
10776                         continue;
10777
10778                 err = verifier_remove_insns(env, i, j);
10779                 if (err)
10780                         return err;
10781                 insn_cnt = env->prog->len;
10782         }
10783
10784         return 0;
10785 }
10786
10787 static int opt_remove_nops(struct bpf_verifier_env *env)
10788 {
10789         const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10790         struct bpf_insn *insn = env->prog->insnsi;
10791         int insn_cnt = env->prog->len;
10792         int i, err;
10793
10794         for (i = 0; i < insn_cnt; i++) {
10795                 if (memcmp(&insn[i], &ja, sizeof(ja)))
10796                         continue;
10797
10798                 err = verifier_remove_insns(env, i, 1);
10799                 if (err)
10800                         return err;
10801                 insn_cnt--;
10802                 i--;
10803         }
10804
10805         return 0;
10806 }
10807
10808 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
10809                                          const union bpf_attr *attr)
10810 {
10811         struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10812         struct bpf_insn_aux_data *aux = env->insn_aux_data;
10813         int i, patch_len, delta = 0, len = env->prog->len;
10814         struct bpf_insn *insns = env->prog->insnsi;
10815         struct bpf_prog *new_prog;
10816         bool rnd_hi32;
10817
10818         rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10819         zext_patch[1] = BPF_ZEXT_REG(0);
10820         rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10821         rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10822         rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10823         for (i = 0; i < len; i++) {
10824                 int adj_idx = i + delta;
10825                 struct bpf_insn insn;
10826
10827                 insn = insns[adj_idx];
10828                 if (!aux[adj_idx].zext_dst) {
10829                         u8 code, class;
10830                         u32 imm_rnd;
10831
10832                         if (!rnd_hi32)
10833                                 continue;
10834
10835                         code = insn.code;
10836                         class = BPF_CLASS(code);
10837                         if (insn_no_def(&insn))
10838                                 continue;
10839
10840                         /* NOTE: arg "reg" (the fourth one) is only used for
10841                          *       BPF_STX which has been ruled out in above
10842                          *       check, it is safe to pass NULL here.
10843                          */
10844                         if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10845                                 if (class == BPF_LD &&
10846                                     BPF_MODE(code) == BPF_IMM)
10847                                         i++;
10848                                 continue;
10849                         }
10850
10851                         /* ctx load could be transformed into wider load. */
10852                         if (class == BPF_LDX &&
10853                             aux[adj_idx].ptr_type == PTR_TO_CTX)
10854                                 continue;
10855
10856                         imm_rnd = get_random_int();
10857                         rnd_hi32_patch[0] = insn;
10858                         rnd_hi32_patch[1].imm = imm_rnd;
10859                         rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10860                         patch = rnd_hi32_patch;
10861                         patch_len = 4;
10862                         goto apply_patch_buffer;
10863                 }
10864
10865                 if (!bpf_jit_needs_zext())
10866                         continue;
10867
10868                 zext_patch[0] = insn;
10869                 zext_patch[1].dst_reg = insn.dst_reg;
10870                 zext_patch[1].src_reg = insn.dst_reg;
10871                 patch = zext_patch;
10872                 patch_len = 2;
10873 apply_patch_buffer:
10874                 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10875                 if (!new_prog)
10876                         return -ENOMEM;
10877                 env->prog = new_prog;
10878                 insns = new_prog->insnsi;
10879                 aux = env->insn_aux_data;
10880                 delta += patch_len - 1;
10881         }
10882
10883         return 0;
10884 }
10885
10886 /* convert load instructions that access fields of a context type into a
10887  * sequence of instructions that access fields of the underlying structure:
10888  *     struct __sk_buff    -> struct sk_buff
10889  *     struct bpf_sock_ops -> struct sock
10890  */
10891 static int convert_ctx_accesses(struct bpf_verifier_env *env)
10892 {
10893         const struct bpf_verifier_ops *ops = env->ops;
10894         int i, cnt, size, ctx_field_size, delta = 0;
10895         const int insn_cnt = env->prog->len;
10896         struct bpf_insn insn_buf[16], *insn;
10897         u32 target_size, size_default, off;
10898         struct bpf_prog *new_prog;
10899         enum bpf_access_type type;
10900         bool is_narrower_load;
10901
10902         if (ops->gen_prologue || env->seen_direct_write) {
10903                 if (!ops->gen_prologue) {
10904                         verbose(env, "bpf verifier is misconfigured\n");
10905                         return -EINVAL;
10906                 }
10907                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
10908                                         env->prog);
10909                 if (cnt >= ARRAY_SIZE(insn_buf)) {
10910                         verbose(env, "bpf verifier is misconfigured\n");
10911                         return -EINVAL;
10912                 } else if (cnt) {
10913                         new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10914                         if (!new_prog)
10915                                 return -ENOMEM;
10916
10917                         env->prog = new_prog;
10918                         delta += cnt - 1;
10919                 }
10920         }
10921
10922         if (bpf_prog_is_dev_bound(env->prog->aux))
10923                 return 0;
10924
10925         insn = env->prog->insnsi + delta;
10926
10927         for (i = 0; i < insn_cnt; i++, insn++) {
10928                 bpf_convert_ctx_access_t convert_ctx_access;
10929                 bool ctx_access;
10930
10931                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
10932                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
10933                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
10934                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
10935                         type = BPF_READ;
10936                         ctx_access = true;
10937                 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
10938                            insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
10939                            insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
10940                            insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
10941                            insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
10942                            insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
10943                            insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
10944                            insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
10945                         type = BPF_WRITE;
10946                         ctx_access = BPF_CLASS(insn->code) == BPF_STX;
10947                 } else {
10948                         continue;
10949                 }
10950
10951                 if (type == BPF_WRITE &&
10952                     env->insn_aux_data[i + delta].sanitize_stack_spill) {
10953                         struct bpf_insn patch[] = {
10954                                 *insn,
10955                                 BPF_ST_NOSPEC(),
10956                         };
10957
10958                         cnt = ARRAY_SIZE(patch);
10959                         new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
10960                         if (!new_prog)
10961                                 return -ENOMEM;
10962
10963                         delta    += cnt - 1;
10964                         env->prog = new_prog;
10965                         insn      = new_prog->insnsi + i + delta;
10966                         continue;
10967                 }
10968
10969                 if (!ctx_access)
10970                         continue;
10971
10972                 switch (env->insn_aux_data[i + delta].ptr_type) {
10973                 case PTR_TO_CTX:
10974                         if (!ops->convert_ctx_access)
10975                                 continue;
10976                         convert_ctx_access = ops->convert_ctx_access;
10977                         break;
10978                 case PTR_TO_SOCKET:
10979                 case PTR_TO_SOCK_COMMON:
10980                         convert_ctx_access = bpf_sock_convert_ctx_access;
10981                         break;
10982                 case PTR_TO_TCP_SOCK:
10983                         convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10984                         break;
10985                 case PTR_TO_XDP_SOCK:
10986                         convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
10987                         break;
10988                 case PTR_TO_BTF_ID:
10989                         if (type == BPF_READ) {
10990                                 insn->code = BPF_LDX | BPF_PROBE_MEM |
10991                                         BPF_SIZE((insn)->code);
10992                                 env->prog->aux->num_exentries++;
10993                         } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
10994                                 verbose(env, "Writes through BTF pointers are not allowed\n");
10995                                 return -EINVAL;
10996                         }
10997                         continue;
10998                 default:
10999                         continue;
11000                 }
11001
11002                 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
11003                 size = BPF_LDST_BYTES(insn);
11004
11005                 /* If the read access is a narrower load of the field,
11006                  * convert to a 4/8-byte load, to minimum program type specific
11007                  * convert_ctx_access changes. If conversion is successful,
11008                  * we will apply proper mask to the result.
11009                  */
11010                 is_narrower_load = size < ctx_field_size;
11011                 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
11012                 off = insn->off;
11013                 if (is_narrower_load) {
11014                         u8 size_code;
11015
11016                         if (type == BPF_WRITE) {
11017                                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
11018                                 return -EINVAL;
11019                         }
11020
11021                         size_code = BPF_H;
11022                         if (ctx_field_size == 4)
11023                                 size_code = BPF_W;
11024                         else if (ctx_field_size == 8)
11025                                 size_code = BPF_DW;
11026
11027                         insn->off = off & ~(size_default - 1);
11028                         insn->code = BPF_LDX | BPF_MEM | size_code;
11029                 }
11030
11031                 target_size = 0;
11032                 cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
11033                                          &target_size);
11034                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
11035                     (ctx_field_size && !target_size)) {
11036                         verbose(env, "bpf verifier is misconfigured\n");
11037                         return -EINVAL;
11038                 }
11039
11040                 if (is_narrower_load && size < target_size) {
11041                         u8 shift = bpf_ctx_narrow_access_offset(
11042                                 off, size, size_default) * 8;
11043                         if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
11044                                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
11045                                 return -EINVAL;
11046                         }
11047                         if (ctx_field_size <= 4) {
11048                                 if (shift)
11049                                         insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
11050                                                                         insn->dst_reg,
11051                                                                         shift);
11052                                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
11053                                                                 (1 << size * 8) - 1);
11054                         } else {
11055                                 if (shift)
11056                                         insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
11057                                                                         insn->dst_reg,
11058                                                                         shift);
11059                                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
11060                                                                 (1ULL << size * 8) - 1);
11061                         }
11062                 }
11063
11064                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11065                 if (!new_prog)
11066                         return -ENOMEM;
11067
11068                 delta += cnt - 1;
11069
11070                 /* keep walking new program and skip insns we just inserted */
11071                 env->prog = new_prog;
11072                 insn      = new_prog->insnsi + i + delta;
11073         }
11074
11075         return 0;
11076 }
11077
11078 static int jit_subprogs(struct bpf_verifier_env *env)
11079 {
11080         struct bpf_prog *prog = env->prog, **func, *tmp;
11081         int i, j, subprog_start, subprog_end = 0, len, subprog;
11082         struct bpf_map *map_ptr;
11083         struct bpf_insn *insn;
11084         void *old_bpf_func;
11085         int err, num_exentries;
11086
11087         if (env->subprog_cnt <= 1)
11088                 return 0;
11089
11090         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11091                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11092                     insn->src_reg != BPF_PSEUDO_CALL)
11093                         continue;
11094                 /* Upon error here we cannot fall back to interpreter but
11095                  * need a hard reject of the program. Thus -EFAULT is
11096                  * propagated in any case.
11097                  */
11098                 subprog = find_subprog(env, i + insn->imm + 1);
11099                 if (subprog < 0) {
11100                         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
11101                                   i + insn->imm + 1);
11102                         return -EFAULT;
11103                 }
11104                 /* temporarily remember subprog id inside insn instead of
11105                  * aux_data, since next loop will split up all insns into funcs
11106                  */
11107                 insn->off = subprog;
11108                 /* remember original imm in case JIT fails and fallback
11109                  * to interpreter will be needed
11110                  */
11111                 env->insn_aux_data[i].call_imm = insn->imm;
11112                 /* point imm to __bpf_call_base+1 from JITs point of view */
11113                 insn->imm = 1;
11114         }
11115
11116         err = bpf_prog_alloc_jited_linfo(prog);
11117         if (err)
11118                 goto out_undo_insn;
11119
11120         err = -ENOMEM;
11121         func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
11122         if (!func)
11123                 goto out_undo_insn;
11124
11125         for (i = 0; i < env->subprog_cnt; i++) {
11126                 subprog_start = subprog_end;
11127                 subprog_end = env->subprog_info[i + 1].start;
11128
11129                 len = subprog_end - subprog_start;
11130                 /* BPF_PROG_RUN doesn't call subprogs directly,
11131                  * hence main prog stats include the runtime of subprogs.
11132                  * subprogs don't have IDs and not reachable via prog_get_next_id
11133                  * func[i]->aux->stats will never be accessed and stays NULL
11134                  */
11135                 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
11136                 if (!func[i])
11137                         goto out_free;
11138                 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
11139                        len * sizeof(struct bpf_insn));
11140                 func[i]->type = prog->type;
11141                 func[i]->len = len;
11142                 if (bpf_prog_calc_tag(func[i]))
11143                         goto out_free;
11144                 func[i]->is_func = 1;
11145                 func[i]->aux->func_idx = i;
11146                 /* Below members will be freed only at prog->aux */
11147                 func[i]->aux->btf = prog->aux->btf;
11148                 func[i]->aux->func_info = prog->aux->func_info;
11149                 func[i]->aux->poke_tab = prog->aux->poke_tab;
11150                 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
11151
11152                 for (j = 0; j < prog->aux->size_poke_tab; j++) {
11153                         struct bpf_jit_poke_descriptor *poke;
11154
11155                         poke = &prog->aux->poke_tab[j];
11156                         if (poke->insn_idx < subprog_end &&
11157                             poke->insn_idx >= subprog_start)
11158                                 poke->aux = func[i]->aux;
11159                 }
11160
11161                 /* Use bpf_prog_F_tag to indicate functions in stack traces.
11162                  * Long term would need debug info to populate names
11163                  */
11164                 func[i]->aux->name[0] = 'F';
11165                 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
11166                 func[i]->jit_requested = 1;
11167                 func[i]->aux->linfo = prog->aux->linfo;
11168                 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11169                 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11170                 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11171                 num_exentries = 0;
11172                 insn = func[i]->insnsi;
11173                 for (j = 0; j < func[i]->len; j++, insn++) {
11174                         if (BPF_CLASS(insn->code) == BPF_LDX &&
11175                             BPF_MODE(insn->code) == BPF_PROBE_MEM)
11176                                 num_exentries++;
11177                 }
11178                 func[i]->aux->num_exentries = num_exentries;
11179                 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
11180                 func[i] = bpf_int_jit_compile(func[i]);
11181                 if (!func[i]->jited) {
11182                         err = -ENOTSUPP;
11183                         goto out_free;
11184                 }
11185                 cond_resched();
11186         }
11187
11188         /* at this point all bpf functions were successfully JITed
11189          * now populate all bpf_calls with correct addresses and
11190          * run last pass of JIT
11191          */
11192         for (i = 0; i < env->subprog_cnt; i++) {
11193                 insn = func[i]->insnsi;
11194                 for (j = 0; j < func[i]->len; j++, insn++) {
11195                         if (insn->code != (BPF_JMP | BPF_CALL) ||
11196                             insn->src_reg != BPF_PSEUDO_CALL)
11197                                 continue;
11198                         subprog = insn->off;
11199                         insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
11200                                     __bpf_call_base;
11201                 }
11202
11203                 /* we use the aux data to keep a list of the start addresses
11204                  * of the JITed images for each function in the program
11205                  *
11206                  * for some architectures, such as powerpc64, the imm field
11207                  * might not be large enough to hold the offset of the start
11208                  * address of the callee's JITed image from __bpf_call_base
11209                  *
11210                  * in such cases, we can lookup the start address of a callee
11211                  * by using its subprog id, available from the off field of
11212                  * the call instruction, as an index for this list
11213                  */
11214                 func[i]->aux->func = func;
11215                 func[i]->aux->func_cnt = env->subprog_cnt;
11216         }
11217         for (i = 0; i < env->subprog_cnt; i++) {
11218                 old_bpf_func = func[i]->bpf_func;
11219                 tmp = bpf_int_jit_compile(func[i]);
11220                 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
11221                         verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
11222                         err = -ENOTSUPP;
11223                         goto out_free;
11224                 }
11225                 cond_resched();
11226         }
11227
11228         /* finally lock prog and jit images for all functions and
11229          * populate kallsysm
11230          */
11231         for (i = 0; i < env->subprog_cnt; i++) {
11232                 bpf_prog_lock_ro(func[i]);
11233                 bpf_prog_kallsyms_add(func[i]);
11234         }
11235
11236         /* Last step: make now unused interpreter insns from main
11237          * prog consistent for later dump requests, so they can
11238          * later look the same as if they were interpreted only.
11239          */
11240         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11241                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11242                     insn->src_reg != BPF_PSEUDO_CALL)
11243                         continue;
11244                 insn->off = env->insn_aux_data[i].call_imm;
11245                 subprog = find_subprog(env, i + insn->off + 1);
11246                 insn->imm = subprog;
11247         }
11248
11249         prog->jited = 1;
11250         prog->bpf_func = func[0]->bpf_func;
11251         prog->aux->func = func;
11252         prog->aux->func_cnt = env->subprog_cnt;
11253         bpf_prog_free_unused_jited_linfo(prog);
11254         return 0;
11255 out_free:
11256         /* We failed JIT'ing, so at this point we need to unregister poke
11257          * descriptors from subprogs, so that kernel is not attempting to
11258          * patch it anymore as we're freeing the subprog JIT memory.
11259          */
11260         for (i = 0; i < prog->aux->size_poke_tab; i++) {
11261                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
11262                 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11263         }
11264         /* At this point we're guaranteed that poke descriptors are not
11265          * live anymore. We can just unlink its descriptor table as it's
11266          * released with the main prog.
11267          */
11268         for (i = 0; i < env->subprog_cnt; i++) {
11269                 if (!func[i])
11270                         continue;
11271                 func[i]->aux->poke_tab = NULL;
11272                 bpf_jit_free(func[i]);
11273         }
11274         kfree(func);
11275 out_undo_insn:
11276         /* cleanup main prog to be interpreted */
11277         prog->jit_requested = 0;
11278         for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11279                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11280                     insn->src_reg != BPF_PSEUDO_CALL)
11281                         continue;
11282                 insn->off = 0;
11283                 insn->imm = env->insn_aux_data[i].call_imm;
11284         }
11285         bpf_prog_free_jited_linfo(prog);
11286         return err;
11287 }
11288
11289 static int fixup_call_args(struct bpf_verifier_env *env)
11290 {
11291 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11292         struct bpf_prog *prog = env->prog;
11293         struct bpf_insn *insn = prog->insnsi;
11294         int i, depth;
11295 #endif
11296         int err = 0;
11297
11298         if (env->prog->jit_requested &&
11299             !bpf_prog_is_dev_bound(env->prog->aux)) {
11300                 err = jit_subprogs(env);
11301                 if (err == 0)
11302                         return 0;
11303                 if (err == -EFAULT)
11304                         return err;
11305         }
11306 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11307         if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11308                 /* When JIT fails the progs with bpf2bpf calls and tail_calls
11309                  * have to be rejected, since interpreter doesn't support them yet.
11310                  */
11311                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11312                 return -EINVAL;
11313         }
11314         for (i = 0; i < prog->len; i++, insn++) {
11315                 if (insn->code != (BPF_JMP | BPF_CALL) ||
11316                     insn->src_reg != BPF_PSEUDO_CALL)
11317                         continue;
11318                 depth = get_callee_stack_depth(env, insn, i);
11319                 if (depth < 0)
11320                         return depth;
11321                 bpf_patch_call_args(insn, depth);
11322         }
11323         err = 0;
11324 #endif
11325         return err;
11326 }
11327
11328 /* fixup insn->imm field of bpf_call instructions
11329  * and inline eligible helpers as explicit sequence of BPF instructions
11330  *
11331  * this function is called after eBPF program passed verification
11332  */
11333 static int fixup_bpf_calls(struct bpf_verifier_env *env)
11334 {
11335         struct bpf_prog *prog = env->prog;
11336         bool expect_blinding = bpf_jit_blinding_enabled(prog);
11337         struct bpf_insn *insn = prog->insnsi;
11338         const struct bpf_func_proto *fn;
11339         const int insn_cnt = prog->len;
11340         const struct bpf_map_ops *ops;
11341         struct bpf_insn_aux_data *aux;
11342         struct bpf_insn insn_buf[16];
11343         struct bpf_prog *new_prog;
11344         struct bpf_map *map_ptr;
11345         int i, ret, cnt, delta = 0;
11346
11347         for (i = 0; i < insn_cnt; i++, insn++) {
11348                 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
11349                     insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
11350                     insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
11351                     insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
11352                         bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
11353                         bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11354                         struct bpf_insn *patchlet;
11355                         struct bpf_insn chk_and_div[] = {
11356                                 /* [R,W]x div 0 -> 0 */
11357                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11358                                              BPF_JNE | BPF_K, insn->src_reg,
11359                                              0, 2, 0),
11360                                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
11361                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11362                                 *insn,
11363                         };
11364                         struct bpf_insn chk_and_mod[] = {
11365                                 /* [R,W]x mod 0 -> [R,W]x */
11366                                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11367                                              BPF_JEQ | BPF_K, insn->src_reg,
11368                                              0, 1 + (is64 ? 0 : 1), 0),
11369                                 *insn,
11370                                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11371                                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
11372                         };
11373
11374                         patchlet = isdiv ? chk_and_div : chk_and_mod;
11375                         cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
11376                                       ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
11377
11378                         new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
11379                         if (!new_prog)
11380                                 return -ENOMEM;
11381
11382                         delta    += cnt - 1;
11383                         env->prog = prog = new_prog;
11384                         insn      = new_prog->insnsi + i + delta;
11385                         continue;
11386                 }
11387
11388                 if (BPF_CLASS(insn->code) == BPF_LD &&
11389                     (BPF_MODE(insn->code) == BPF_ABS ||
11390                      BPF_MODE(insn->code) == BPF_IND)) {
11391                         cnt = env->ops->gen_ld_abs(insn, insn_buf);
11392                         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11393                                 verbose(env, "bpf verifier is misconfigured\n");
11394                                 return -EINVAL;
11395                         }
11396
11397                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11398                         if (!new_prog)
11399                                 return -ENOMEM;
11400
11401                         delta    += cnt - 1;
11402                         env->prog = prog = new_prog;
11403                         insn      = new_prog->insnsi + i + delta;
11404                         continue;
11405                 }
11406
11407                 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
11408                     insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
11409                         const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
11410                         const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
11411                         struct bpf_insn insn_buf[16];
11412                         struct bpf_insn *patch = &insn_buf[0];
11413                         bool issrc, isneg, isimm;
11414                         u32 off_reg;
11415
11416                         aux = &env->insn_aux_data[i + delta];
11417                         if (!aux->alu_state ||
11418                             aux->alu_state == BPF_ALU_NON_POINTER)
11419                                 continue;
11420
11421                         isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
11422                         issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
11423                                 BPF_ALU_SANITIZE_SRC;
11424                         isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
11425
11426                         off_reg = issrc ? insn->src_reg : insn->dst_reg;
11427                         if (isimm) {
11428                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11429                         } else {
11430                                 if (isneg)
11431                                         *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11432                                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11433                                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
11434                                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
11435                                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
11436                                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
11437                                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
11438                         }
11439                         if (!issrc)
11440                                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
11441                         insn->src_reg = BPF_REG_AX;
11442                         if (isneg)
11443                                 insn->code = insn->code == code_add ?
11444                                              code_sub : code_add;
11445                         *patch++ = *insn;
11446                         if (issrc && isneg && !isimm)
11447                                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11448                         cnt = patch - insn_buf;
11449
11450                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11451                         if (!new_prog)
11452                                 return -ENOMEM;
11453
11454                         delta    += cnt - 1;
11455                         env->prog = prog = new_prog;
11456                         insn      = new_prog->insnsi + i + delta;
11457                         continue;
11458                 }
11459
11460                 if (insn->code != (BPF_JMP | BPF_CALL))
11461                         continue;
11462                 if (insn->src_reg == BPF_PSEUDO_CALL)
11463                         continue;
11464
11465                 if (insn->imm == BPF_FUNC_get_route_realm)
11466                         prog->dst_needed = 1;
11467                 if (insn->imm == BPF_FUNC_get_prandom_u32)
11468                         bpf_user_rnd_init_once();
11469                 if (insn->imm == BPF_FUNC_override_return)
11470                         prog->kprobe_override = 1;
11471                 if (insn->imm == BPF_FUNC_tail_call) {
11472                         /* If we tail call into other programs, we
11473                          * cannot make any assumptions since they can
11474                          * be replaced dynamically during runtime in
11475                          * the program array.
11476                          */
11477                         prog->cb_access = 1;
11478                         if (!allow_tail_call_in_subprogs(env))
11479                                 prog->aux->stack_depth = MAX_BPF_STACK;
11480                         prog->aux->max_pkt_offset = MAX_PACKET_OFF;
11481
11482                         /* mark bpf_tail_call as different opcode to avoid
11483                          * conditional branch in the interpeter for every normal
11484                          * call and to prevent accidental JITing by JIT compiler
11485                          * that doesn't support bpf_tail_call yet
11486                          */
11487                         insn->imm = 0;
11488                         insn->code = BPF_JMP | BPF_TAIL_CALL;
11489
11490                         aux = &env->insn_aux_data[i + delta];
11491                         if (env->bpf_capable && !expect_blinding &&
11492                             prog->jit_requested &&
11493                             !bpf_map_key_poisoned(aux) &&
11494                             !bpf_map_ptr_poisoned(aux) &&
11495                             !bpf_map_ptr_unpriv(aux)) {
11496                                 struct bpf_jit_poke_descriptor desc = {
11497                                         .reason = BPF_POKE_REASON_TAIL_CALL,
11498                                         .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11499                                         .tail_call.key = bpf_map_key_immediate(aux),
11500                                         .insn_idx = i + delta,
11501                                 };
11502
11503                                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
11504                                 if (ret < 0) {
11505                                         verbose(env, "adding tail call poke descriptor failed\n");
11506                                         return ret;
11507                                 }
11508
11509                                 insn->imm = ret + 1;
11510                                 continue;
11511                         }
11512
11513                         if (!bpf_map_ptr_unpriv(aux))
11514                                 continue;
11515
11516                         /* instead of changing every JIT dealing with tail_call
11517                          * emit two extra insns:
11518                          * if (index >= max_entries) goto out;
11519                          * index &= array->index_mask;
11520                          * to avoid out-of-bounds cpu speculation
11521                          */
11522                         if (bpf_map_ptr_poisoned(aux)) {
11523                                 verbose(env, "tail_call abusing map_ptr\n");
11524                                 return -EINVAL;
11525                         }
11526
11527                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11528                         insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
11529                                                   map_ptr->max_entries, 2);
11530                         insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
11531                                                     container_of(map_ptr,
11532                                                                  struct bpf_array,
11533                                                                  map)->index_mask);
11534                         insn_buf[2] = *insn;
11535                         cnt = 3;
11536                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11537                         if (!new_prog)
11538                                 return -ENOMEM;
11539
11540                         delta    += cnt - 1;
11541                         env->prog = prog = new_prog;
11542                         insn      = new_prog->insnsi + i + delta;
11543                         continue;
11544                 }
11545
11546                 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11547                  * and other inlining handlers are currently limited to 64 bit
11548                  * only.
11549                  */
11550                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
11551                     (insn->imm == BPF_FUNC_map_lookup_elem ||
11552                      insn->imm == BPF_FUNC_map_update_elem ||
11553                      insn->imm == BPF_FUNC_map_delete_elem ||
11554                      insn->imm == BPF_FUNC_map_push_elem   ||
11555                      insn->imm == BPF_FUNC_map_pop_elem    ||
11556                      insn->imm == BPF_FUNC_map_peek_elem)) {
11557                         aux = &env->insn_aux_data[i + delta];
11558                         if (bpf_map_ptr_poisoned(aux))
11559                                 goto patch_call_imm;
11560
11561                         map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11562                         ops = map_ptr->ops;
11563                         if (insn->imm == BPF_FUNC_map_lookup_elem &&
11564                             ops->map_gen_lookup) {
11565                                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11566                                 if (cnt == -EOPNOTSUPP)
11567                                         goto patch_map_ops_generic;
11568                                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11569                                         verbose(env, "bpf verifier is misconfigured\n");
11570                                         return -EINVAL;
11571                                 }
11572
11573                                 new_prog = bpf_patch_insn_data(env, i + delta,
11574                                                                insn_buf, cnt);
11575                                 if (!new_prog)
11576                                         return -ENOMEM;
11577
11578                                 delta    += cnt - 1;
11579                                 env->prog = prog = new_prog;
11580                                 insn      = new_prog->insnsi + i + delta;
11581                                 continue;
11582                         }
11583
11584                         BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
11585                                      (void *(*)(struct bpf_map *map, void *key))NULL));
11586                         BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
11587                                      (int (*)(struct bpf_map *map, void *key))NULL));
11588                         BUILD_BUG_ON(!__same_type(ops->map_update_elem,
11589                                      (int (*)(struct bpf_map *map, void *key, void *value,
11590                                               u64 flags))NULL));
11591                         BUILD_BUG_ON(!__same_type(ops->map_push_elem,
11592                                      (int (*)(struct bpf_map *map, void *value,
11593                                               u64 flags))NULL));
11594                         BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
11595                                      (int (*)(struct bpf_map *map, void *value))NULL));
11596                         BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
11597                                      (int (*)(struct bpf_map *map, void *value))NULL));
11598 patch_map_ops_generic:
11599                         switch (insn->imm) {
11600                         case BPF_FUNC_map_lookup_elem:
11601                                 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
11602                                             __bpf_call_base;
11603                                 continue;
11604                         case BPF_FUNC_map_update_elem:
11605                                 insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
11606                                             __bpf_call_base;
11607                                 continue;
11608                         case BPF_FUNC_map_delete_elem:
11609                                 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
11610                                             __bpf_call_base;
11611                                 continue;
11612                         case BPF_FUNC_map_push_elem:
11613                                 insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
11614                                             __bpf_call_base;
11615                                 continue;
11616                         case BPF_FUNC_map_pop_elem:
11617                                 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
11618                                             __bpf_call_base;
11619                                 continue;
11620                         case BPF_FUNC_map_peek_elem:
11621                                 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
11622                                             __bpf_call_base;
11623                                 continue;
11624                         }
11625
11626                         goto patch_call_imm;
11627                 }
11628
11629                 if (prog->jit_requested && BITS_PER_LONG == 64 &&
11630                     insn->imm == BPF_FUNC_jiffies64) {
11631                         struct bpf_insn ld_jiffies_addr[2] = {
11632                                 BPF_LD_IMM64(BPF_REG_0,
11633                                              (unsigned long)&jiffies),
11634                         };
11635
11636                         insn_buf[0] = ld_jiffies_addr[0];
11637                         insn_buf[1] = ld_jiffies_addr[1];
11638                         insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
11639                                                   BPF_REG_0, 0);
11640                         cnt = 3;
11641
11642                         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
11643                                                        cnt);
11644                         if (!new_prog)
11645                                 return -ENOMEM;
11646
11647                         delta    += cnt - 1;
11648                         env->prog = prog = new_prog;
11649                         insn      = new_prog->insnsi + i + delta;
11650                         continue;
11651                 }
11652
11653 patch_call_imm:
11654                 fn = env->ops->get_func_proto(insn->imm, env->prog);
11655                 /* all functions that have prototype and verifier allowed
11656                  * programs to call them, must be real in-kernel functions
11657                  */
11658                 if (!fn->func) {
11659                         verbose(env,
11660                                 "kernel subsystem misconfigured func %s#%d\n",
11661                                 func_id_name(insn->imm), insn->imm);
11662                         return -EFAULT;
11663                 }
11664                 insn->imm = fn->func - __bpf_call_base;
11665         }
11666
11667         /* Since poke tab is now finalized, publish aux to tracker. */
11668         for (i = 0; i < prog->aux->size_poke_tab; i++) {
11669                 map_ptr = prog->aux->poke_tab[i].tail_call.map;
11670                 if (!map_ptr->ops->map_poke_track ||
11671                     !map_ptr->ops->map_poke_untrack ||
11672                     !map_ptr->ops->map_poke_run) {
11673                         verbose(env, "bpf verifier is misconfigured\n");
11674                         return -EINVAL;
11675                 }
11676
11677                 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11678                 if (ret < 0) {
11679                         verbose(env, "tracking tail call prog failed\n");
11680                         return ret;
11681                 }
11682         }
11683
11684         return 0;
11685 }
11686
11687 static void free_states(struct bpf_verifier_env *env)
11688 {
11689         struct bpf_verifier_state_list *sl, *sln;
11690         int i;
11691
11692         sl = env->free_list;
11693         while (sl) {
11694                 sln = sl->next;
11695                 free_verifier_state(&sl->state, false);
11696                 kfree(sl);
11697                 sl = sln;
11698         }
11699         env->free_list = NULL;
11700
11701         if (!env->explored_states)
11702                 return;
11703
11704         for (i = 0; i < state_htab_size(env); i++) {
11705                 sl = env->explored_states[i];
11706
11707                 while (sl) {
11708                         sln = sl->next;
11709                         free_verifier_state(&sl->state, false);
11710                         kfree(sl);
11711                         sl = sln;
11712                 }
11713                 env->explored_states[i] = NULL;
11714         }
11715 }
11716
11717 static int do_check_common(struct bpf_verifier_env *env, int subprog)
11718 {
11719         bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11720         struct bpf_verifier_state *state;
11721         struct bpf_reg_state *regs;
11722         int ret, i;
11723
11724         env->prev_linfo = NULL;
11725         env->pass_cnt++;
11726
11727         state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11728         if (!state)
11729                 return -ENOMEM;
11730         state->curframe = 0;
11731         state->speculative = false;
11732         state->branches = 1;
11733         state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11734         if (!state->frame[0]) {
11735                 kfree(state);
11736                 return -ENOMEM;
11737         }
11738         env->cur_state = state;
11739         init_func_state(env, state->frame[0],
11740                         BPF_MAIN_FUNC /* callsite */,
11741                         0 /* frameno */,
11742                         subprog);
11743
11744         regs = state->frame[state->curframe]->regs;
11745         if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11746                 ret = btf_prepare_func_args(env, subprog, regs);
11747                 if (ret)
11748                         goto out;
11749                 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11750                         if (regs[i].type == PTR_TO_CTX)
11751                                 mark_reg_known_zero(env, regs, i);
11752                         else if (regs[i].type == SCALAR_VALUE)
11753                                 mark_reg_unknown(env, regs, i);
11754                 }
11755         } else {
11756                 /* 1st arg to a function */
11757                 regs[BPF_REG_1].type = PTR_TO_CTX;
11758                 mark_reg_known_zero(env, regs, BPF_REG_1);
11759                 ret = btf_check_func_arg_match(env, subprog, regs);
11760                 if (ret == -EFAULT)
11761                         /* unlikely verifier bug. abort.
11762                          * ret == 0 and ret < 0 are sadly acceptable for
11763                          * main() function due to backward compatibility.
11764                          * Like socket filter program may be written as:
11765                          * int bpf_prog(struct pt_regs *ctx)
11766                          * and never dereference that ctx in the program.
11767                          * 'struct pt_regs' is a type mismatch for socket
11768                          * filter that should be using 'struct __sk_buff'.
11769                          */
11770                         goto out;
11771         }
11772
11773         ret = do_check(env);
11774 out:
11775         /* check for NULL is necessary, since cur_state can be freed inside
11776          * do_check() under memory pressure.
11777          */
11778         if (env->cur_state) {
11779                 free_verifier_state(env->cur_state, true);
11780                 env->cur_state = NULL;
11781         }
11782         while (!pop_stack(env, NULL, NULL, false));
11783         if (!ret && pop_log)
11784                 bpf_vlog_reset(&env->log, 0);
11785         free_states(env);
11786         return ret;
11787 }
11788
11789 /* Verify all global functions in a BPF program one by one based on their BTF.
11790  * All global functions must pass verification. Otherwise the whole program is rejected.
11791  * Consider:
11792  * int bar(int);
11793  * int foo(int f)
11794  * {
11795  *    return bar(f);
11796  * }
11797  * int bar(int b)
11798  * {
11799  *    ...
11800  * }
11801  * foo() will be verified first for R1=any_scalar_value. During verification it
11802  * will be assumed that bar() already verified successfully and call to bar()
11803  * from foo() will be checked for type match only. Later bar() will be verified
11804  * independently to check that it's safe for R1=any_scalar_value.
11805  */
11806 static int do_check_subprogs(struct bpf_verifier_env *env)
11807 {
11808         struct bpf_prog_aux *aux = env->prog->aux;
11809         int i, ret;
11810
11811         if (!aux->func_info)
11812                 return 0;
11813
11814         for (i = 1; i < env->subprog_cnt; i++) {
11815                 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
11816                         continue;
11817                 env->insn_idx = env->subprog_info[i].start;
11818                 WARN_ON_ONCE(env->insn_idx == 0);
11819                 ret = do_check_common(env, i);
11820                 if (ret) {
11821                         return ret;
11822                 } else if (env->log.level & BPF_LOG_LEVEL) {
11823                         verbose(env,
11824                                 "Func#%d is safe for any args that match its prototype\n",
11825                                 i);
11826                 }
11827         }
11828         return 0;
11829 }
11830
11831 static int do_check_main(struct bpf_verifier_env *env)
11832 {
11833         int ret;
11834
11835         env->insn_idx = 0;
11836         ret = do_check_common(env, 0);
11837         if (!ret)
11838                 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11839         return ret;
11840 }
11841
11842
11843 static void print_verification_stats(struct bpf_verifier_env *env)
11844 {
11845         int i;
11846
11847         if (env->log.level & BPF_LOG_STATS) {
11848                 verbose(env, "verification time %lld usec\n",
11849                         div_u64(env->verification_time, 1000));
11850                 verbose(env, "stack depth ");
11851                 for (i = 0; i < env->subprog_cnt; i++) {
11852                         u32 depth = env->subprog_info[i].stack_depth;
11853
11854                         verbose(env, "%d", depth);
11855                         if (i + 1 < env->subprog_cnt)
11856                                 verbose(env, "+");
11857                 }
11858                 verbose(env, "\n");
11859         }
11860         verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
11861                 "total_states %d peak_states %d mark_read %d\n",
11862                 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
11863                 env->max_states_per_insn, env->total_states,
11864                 env->peak_states, env->longest_mark_read_walk);
11865 }
11866
11867 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11868 {
11869         const struct btf_type *t, *func_proto;
11870         const struct bpf_struct_ops *st_ops;
11871         const struct btf_member *member;
11872         struct bpf_prog *prog = env->prog;
11873         u32 btf_id, member_idx;
11874         const char *mname;
11875
11876         if (!prog->gpl_compatible) {
11877                 verbose(env, "struct ops programs must have a GPL compatible license\n");
11878                 return -EINVAL;
11879         }
11880
11881         btf_id = prog->aux->attach_btf_id;
11882         st_ops = bpf_struct_ops_find(btf_id);
11883         if (!st_ops) {
11884                 verbose(env, "attach_btf_id %u is not a supported struct\n",
11885                         btf_id);
11886                 return -ENOTSUPP;
11887         }
11888
11889         t = st_ops->type;
11890         member_idx = prog->expected_attach_type;
11891         if (member_idx >= btf_type_vlen(t)) {
11892                 verbose(env, "attach to invalid member idx %u of struct %s\n",
11893                         member_idx, st_ops->name);
11894                 return -EINVAL;
11895         }
11896
11897         member = &btf_type_member(t)[member_idx];
11898         mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11899         func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
11900                                                NULL);
11901         if (!func_proto) {
11902                 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
11903                         mname, member_idx, st_ops->name);
11904                 return -EINVAL;
11905         }
11906
11907         if (st_ops->check_member) {
11908                 int err = st_ops->check_member(t, member);
11909
11910                 if (err) {
11911                         verbose(env, "attach to unsupported member %s of struct %s\n",
11912                                 mname, st_ops->name);
11913                         return err;
11914                 }
11915         }
11916
11917         prog->aux->attach_func_proto = func_proto;
11918         prog->aux->attach_func_name = mname;
11919         env->ops = st_ops->verifier_ops;
11920
11921         return 0;
11922 }
11923 #define SECURITY_PREFIX "security_"
11924
11925 static int check_attach_modify_return(unsigned long addr, const char *func_name)
11926 {
11927         if (within_error_injection_list(addr) ||
11928             !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
11929                 return 0;
11930
11931         return -EINVAL;
11932 }
11933
11934 /* non exhaustive list of sleepable bpf_lsm_*() functions */
11935 BTF_SET_START(btf_sleepable_lsm_hooks)
11936 #ifdef CONFIG_BPF_LSM
11937 BTF_ID(func, bpf_lsm_bprm_committed_creds)
11938 #else
11939 BTF_ID_UNUSED
11940 #endif
11941 BTF_SET_END(btf_sleepable_lsm_hooks)
11942
11943 static int check_sleepable_lsm_hook(u32 btf_id)
11944 {
11945         return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11946 }
11947
11948 /* list of non-sleepable functions that are otherwise on
11949  * ALLOW_ERROR_INJECTION list
11950  */
11951 BTF_SET_START(btf_non_sleepable_error_inject)
11952 /* Three functions below can be called from sleepable and non-sleepable context.
11953  * Assume non-sleepable from bpf safety point of view.
11954  */
11955 BTF_ID(func, __add_to_page_cache_locked)
11956 BTF_ID(func, should_fail_alloc_page)
11957 BTF_ID(func, should_failslab)
11958 BTF_SET_END(btf_non_sleepable_error_inject)
11959
11960 static int check_non_sleepable_error_inject(u32 btf_id)
11961 {
11962         return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11963 }
11964
11965 int bpf_check_attach_target(struct bpf_verifier_log *log,
11966                             const struct bpf_prog *prog,
11967                             const struct bpf_prog *tgt_prog,
11968                             u32 btf_id,
11969                             struct bpf_attach_target_info *tgt_info)
11970 {
11971         bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
11972         const char prefix[] = "btf_trace_";
11973         int ret = 0, subprog = -1, i;
11974         const struct btf_type *t;
11975         bool conservative = true;
11976         const char *tname;
11977         struct btf *btf;
11978         long addr = 0;
11979
11980         if (!btf_id) {
11981                 bpf_log(log, "Tracing programs must provide btf_id\n");
11982                 return -EINVAL;
11983         }
11984         btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
11985         if (!btf) {
11986                 bpf_log(log,
11987                         "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
11988                 return -EINVAL;
11989         }
11990         t = btf_type_by_id(btf, btf_id);
11991         if (!t) {
11992                 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
11993                 return -EINVAL;
11994         }
11995         tname = btf_name_by_offset(btf, t->name_off);
11996         if (!tname) {
11997                 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
11998                 return -EINVAL;
11999         }
12000         if (tgt_prog) {
12001                 struct bpf_prog_aux *aux = tgt_prog->aux;
12002
12003                 for (i = 0; i < aux->func_info_cnt; i++)
12004                         if (aux->func_info[i].type_id == btf_id) {
12005                                 subprog = i;
12006                                 break;
12007                         }
12008                 if (subprog == -1) {
12009                         bpf_log(log, "Subprog %s doesn't exist\n", tname);
12010                         return -EINVAL;
12011                 }
12012                 conservative = aux->func_info_aux[subprog].unreliable;
12013                 if (prog_extension) {
12014                         if (conservative) {
12015                                 bpf_log(log,
12016                                         "Cannot replace static functions\n");
12017                                 return -EINVAL;
12018                         }
12019                         if (!prog->jit_requested) {
12020                                 bpf_log(log,
12021                                         "Extension programs should be JITed\n");
12022                                 return -EINVAL;
12023                         }
12024                 }
12025                 if (!tgt_prog->jited) {
12026                         bpf_log(log, "Can attach to only JITed progs\n");
12027                         return -EINVAL;
12028                 }
12029                 if (tgt_prog->type == prog->type) {
12030                         /* Cannot fentry/fexit another fentry/fexit program.
12031                          * Cannot attach program extension to another extension.
12032                          * It's ok to attach fentry/fexit to extension program.
12033                          */
12034                         bpf_log(log, "Cannot recursively attach\n");
12035                         return -EINVAL;
12036                 }
12037                 if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
12038                     prog_extension &&
12039                     (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
12040                      tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
12041                         /* Program extensions can extend all program types
12042                          * except fentry/fexit. The reason is the following.
12043                          * The fentry/fexit programs are used for performance
12044                          * analysis, stats and can be attached to any program
12045                          * type except themselves. When extension program is
12046                          * replacing XDP function it is necessary to allow
12047                          * performance analysis of all functions. Both original
12048                          * XDP program and its program extension. Hence
12049                          * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
12050                          * allowed. If extending of fentry/fexit was allowed it
12051                          * would be possible to create long call chain
12052                          * fentry->extension->fentry->extension beyond
12053                          * reasonable stack size. Hence extending fentry is not
12054                          * allowed.
12055                          */
12056                         bpf_log(log, "Cannot extend fentry/fexit\n");
12057                         return -EINVAL;
12058                 }
12059         } else {
12060                 if (prog_extension) {
12061                         bpf_log(log, "Cannot replace kernel functions\n");
12062                         return -EINVAL;
12063                 }
12064         }
12065
12066         switch (prog->expected_attach_type) {
12067         case BPF_TRACE_RAW_TP:
12068                 if (tgt_prog) {
12069                         bpf_log(log,
12070                                 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
12071                         return -EINVAL;
12072                 }
12073                 if (!btf_type_is_typedef(t)) {
12074                         bpf_log(log, "attach_btf_id %u is not a typedef\n",
12075                                 btf_id);
12076                         return -EINVAL;
12077                 }
12078                 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
12079                         bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
12080                                 btf_id, tname);
12081                         return -EINVAL;
12082                 }
12083                 tname += sizeof(prefix) - 1;
12084                 t = btf_type_by_id(btf, t->type);
12085                 if (!btf_type_is_ptr(t))
12086                         /* should never happen in valid vmlinux build */
12087                         return -EINVAL;
12088                 t = btf_type_by_id(btf, t->type);
12089                 if (!btf_type_is_func_proto(t))
12090                         /* should never happen in valid vmlinux build */
12091                         return -EINVAL;
12092
12093                 break;
12094         case BPF_TRACE_ITER:
12095                 if (!btf_type_is_func(t)) {
12096                         bpf_log(log, "attach_btf_id %u is not a function\n",
12097                                 btf_id);
12098                         return -EINVAL;
12099                 }
12100                 t = btf_type_by_id(btf, t->type);
12101                 if (!btf_type_is_func_proto(t))
12102                         return -EINVAL;
12103                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12104                 if (ret)
12105                         return ret;
12106                 break;
12107         default:
12108                 if (!prog_extension)
12109                         return -EINVAL;
12110                 fallthrough;
12111         case BPF_MODIFY_RETURN:
12112         case BPF_LSM_MAC:
12113         case BPF_TRACE_FENTRY:
12114         case BPF_TRACE_FEXIT:
12115                 if (!btf_type_is_func(t)) {
12116                         bpf_log(log, "attach_btf_id %u is not a function\n",
12117                                 btf_id);
12118                         return -EINVAL;
12119                 }
12120                 if (prog_extension &&
12121                     btf_check_type_match(log, prog, btf, t))
12122                         return -EINVAL;
12123                 t = btf_type_by_id(btf, t->type);
12124                 if (!btf_type_is_func_proto(t))
12125                         return -EINVAL;
12126
12127                 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
12128                     (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
12129                      prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
12130                         return -EINVAL;
12131
12132                 if (tgt_prog && conservative)
12133                         t = NULL;
12134
12135                 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12136                 if (ret < 0)
12137                         return ret;
12138
12139                 if (tgt_prog) {
12140                         if (subprog == 0)
12141                                 addr = (long) tgt_prog->bpf_func;
12142                         else
12143                                 addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
12144                 } else {
12145                         addr = kallsyms_lookup_name(tname);
12146                         if (!addr) {
12147                                 bpf_log(log,
12148                                         "The address of function %s cannot be found\n",
12149                                         tname);
12150                                 return -ENOENT;
12151                         }
12152                 }
12153
12154                 if (prog->aux->sleepable) {
12155                         ret = -EINVAL;
12156                         switch (prog->type) {
12157                         case BPF_PROG_TYPE_TRACING:
12158                                 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
12159                                  * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12160                                  */
12161                                 if (!check_non_sleepable_error_inject(btf_id) &&
12162                                     within_error_injection_list(addr))
12163                                         ret = 0;
12164                                 break;
12165                         case BPF_PROG_TYPE_LSM:
12166                                 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
12167                                  * Only some of them are sleepable.
12168                                  */
12169                                 if (check_sleepable_lsm_hook(btf_id))
12170                                         ret = 0;
12171                                 break;
12172                         default:
12173                                 break;
12174                         }
12175                         if (ret) {
12176                                 bpf_log(log, "%s is not sleepable\n", tname);
12177                                 return ret;
12178                         }
12179                 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12180                         if (tgt_prog) {
12181                                 bpf_log(log, "can't modify return codes of BPF programs\n");
12182                                 return -EINVAL;
12183                         }
12184                         ret = check_attach_modify_return(addr, tname);
12185                         if (ret) {
12186                                 bpf_log(log, "%s() is not modifiable\n", tname);
12187                                 return ret;
12188                         }
12189                 }
12190
12191                 break;
12192         }
12193         tgt_info->tgt_addr = addr;
12194         tgt_info->tgt_name = tname;
12195         tgt_info->tgt_type = t;
12196         return 0;
12197 }
12198
12199 static int check_attach_btf_id(struct bpf_verifier_env *env)
12200 {
12201         struct bpf_prog *prog = env->prog;
12202         struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12203         struct bpf_attach_target_info tgt_info = {};
12204         u32 btf_id = prog->aux->attach_btf_id;
12205         struct bpf_trampoline *tr;
12206         int ret;
12207         u64 key;
12208
12209         if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
12210             prog->type != BPF_PROG_TYPE_LSM) {
12211                 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12212                 return -EINVAL;
12213         }
12214
12215         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
12216                 return check_struct_ops_btf_id(env);
12217
12218         if (prog->type != BPF_PROG_TYPE_TRACING &&
12219             prog->type != BPF_PROG_TYPE_LSM &&
12220             prog->type != BPF_PROG_TYPE_EXT)
12221                 return 0;
12222
12223         ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12224         if (ret)
12225                 return ret;
12226
12227         if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12228                 /* to make freplace equivalent to their targets, they need to
12229                  * inherit env->ops and expected_attach_type for the rest of the
12230                  * verification
12231                  */
12232                 env->ops = bpf_verifier_ops[tgt_prog->type];
12233                 prog->expected_attach_type = tgt_prog->expected_attach_type;
12234         }
12235
12236         /* store info about the attachment target that will be used later */
12237         prog->aux->attach_func_proto = tgt_info.tgt_type;
12238         prog->aux->attach_func_name = tgt_info.tgt_name;
12239
12240         if (tgt_prog) {
12241                 prog->aux->saved_dst_prog_type = tgt_prog->type;
12242                 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12243         }
12244
12245         if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12246                 prog->aux->attach_btf_trace = true;
12247                 return 0;
12248         } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12249                 if (!bpf_iter_prog_supported(prog))
12250                         return -EINVAL;
12251                 return 0;
12252         }
12253
12254         if (prog->type == BPF_PROG_TYPE_LSM) {
12255                 ret = bpf_lsm_verify_prog(&env->log, prog);
12256                 if (ret < 0)
12257                         return ret;
12258         }
12259
12260         key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12261         tr = bpf_trampoline_get(key, &tgt_info);
12262         if (!tr)
12263                 return -ENOMEM;
12264
12265         prog->aux->dst_trampoline = tr;
12266         return 0;
12267 }
12268
12269 struct btf *bpf_get_btf_vmlinux(void)
12270 {
12271         if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12272                 mutex_lock(&bpf_verifier_lock);
12273                 if (!btf_vmlinux)
12274                         btf_vmlinux = btf_parse_vmlinux();
12275                 mutex_unlock(&bpf_verifier_lock);
12276         }
12277         return btf_vmlinux;
12278 }
12279
12280 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
12281               union bpf_attr __user *uattr)
12282 {
12283         u64 start_time = ktime_get_ns();
12284         struct bpf_verifier_env *env;
12285         struct bpf_verifier_log *log;
12286         int i, len, ret = -EINVAL;
12287         bool is_priv;
12288
12289         /* no program is valid */
12290         if (ARRAY_SIZE(bpf_verifier_ops) == 0)
12291                 return -EINVAL;
12292
12293         /* 'struct bpf_verifier_env' can be global, but since it's not small,
12294          * allocate/free it every time bpf_check() is called
12295          */
12296         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
12297         if (!env)
12298                 return -ENOMEM;
12299         log = &env->log;
12300
12301         len = (*prog)->len;
12302         env->insn_aux_data =
12303                 vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
12304         ret = -ENOMEM;
12305         if (!env->insn_aux_data)
12306                 goto err_free_env;
12307         for (i = 0; i < len; i++)
12308                 env->insn_aux_data[i].orig_idx = i;
12309         env->prog = *prog;
12310         env->ops = bpf_verifier_ops[env->prog->type];
12311         is_priv = bpf_capable();
12312
12313         bpf_get_btf_vmlinux();
12314
12315         /* grab the mutex to protect few globals used by verifier */
12316         if (!is_priv)
12317                 mutex_lock(&bpf_verifier_lock);
12318
12319         if (attr->log_level || attr->log_buf || attr->log_size) {
12320                 /* user requested verbose verifier output
12321                  * and supplied buffer to store the verification trace
12322                  */
12323                 log->level = attr->log_level;
12324                 log->ubuf = (char __user *) (unsigned long) attr->log_buf;
12325                 log->len_total = attr->log_size;
12326
12327                 ret = -EINVAL;
12328                 /* log attributes have to be sane */
12329                 if (log->len_total < 128 || log->len_total > UINT_MAX >> 2 ||
12330                     !log->level || !log->ubuf || log->level & ~BPF_LOG_MASK)
12331                         goto err_unlock;
12332         }
12333
12334         if (IS_ERR(btf_vmlinux)) {
12335                 /* Either gcc or pahole or kernel are broken. */
12336                 verbose(env, "in-kernel BTF is malformed\n");
12337                 ret = PTR_ERR(btf_vmlinux);
12338                 goto skip_full_check;
12339         }
12340
12341         env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
12342         if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
12343                 env->strict_alignment = true;
12344         if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
12345                 env->strict_alignment = false;
12346
12347         env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12348         env->allow_uninit_stack = bpf_allow_uninit_stack();
12349         env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12350         env->bypass_spec_v1 = bpf_bypass_spec_v1();
12351         env->bypass_spec_v4 = bpf_bypass_spec_v4();
12352         env->bpf_capable = bpf_capable();
12353
12354         if (is_priv)
12355                 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
12356
12357         env->explored_states = kvcalloc(state_htab_size(env),
12358                                        sizeof(struct bpf_verifier_state_list *),
12359                                        GFP_USER);
12360         ret = -ENOMEM;
12361         if (!env->explored_states)
12362                 goto skip_full_check;
12363
12364         ret = check_subprogs(env);
12365         if (ret < 0)
12366                 goto skip_full_check;
12367
12368         ret = check_btf_info(env, attr, uattr);
12369         if (ret < 0)
12370                 goto skip_full_check;
12371
12372         ret = check_attach_btf_id(env);
12373         if (ret)
12374                 goto skip_full_check;
12375
12376         ret = resolve_pseudo_ldimm64(env);
12377         if (ret < 0)
12378                 goto skip_full_check;
12379
12380         if (bpf_prog_is_dev_bound(env->prog->aux)) {
12381                 ret = bpf_prog_offload_verifier_prep(env->prog);
12382                 if (ret)
12383                         goto skip_full_check;
12384         }
12385
12386         ret = check_cfg(env);
12387         if (ret < 0)
12388                 goto skip_full_check;
12389
12390         ret = do_check_subprogs(env);
12391         ret = ret ?: do_check_main(env);
12392
12393         if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
12394                 ret = bpf_prog_offload_finalize(env);
12395
12396 skip_full_check:
12397         kvfree(env->explored_states);
12398
12399         if (ret == 0)
12400                 ret = check_max_stack_depth(env);
12401
12402         /* instruction rewrites happen after this point */
12403         if (is_priv) {
12404                 if (ret == 0)
12405                         opt_hard_wire_dead_code_branches(env);
12406                 if (ret == 0)
12407                         ret = opt_remove_dead_code(env);
12408                 if (ret == 0)
12409                         ret = opt_remove_nops(env);
12410         } else {
12411                 if (ret == 0)
12412                         sanitize_dead_code(env);
12413         }
12414
12415         if (ret == 0)
12416                 /* program is valid, convert *(u32*)(ctx + off) accesses */
12417                 ret = convert_ctx_accesses(env);
12418
12419         if (ret == 0)
12420                 ret = fixup_bpf_calls(env);
12421
12422         /* do 32-bit optimization after insn patching has done so those patched
12423          * insns could be handled correctly.
12424          */
12425         if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12426                 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12427                 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
12428                                                                      : false;
12429         }
12430
12431         if (ret == 0)
12432                 ret = fixup_call_args(env);
12433
12434         env->verification_time = ktime_get_ns() - start_time;
12435         print_verification_stats(env);
12436
12437         if (log->level && bpf_verifier_log_full(log))
12438                 ret = -ENOSPC;
12439         if (log->level && !log->ubuf) {
12440                 ret = -EFAULT;
12441                 goto err_release_maps;
12442         }
12443
12444         if (ret == 0 && env->used_map_cnt) {
12445                 /* if program passed verifier, update used_maps in bpf_prog_info */
12446                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
12447                                                           sizeof(env->used_maps[0]),
12448                                                           GFP_KERNEL);
12449
12450                 if (!env->prog->aux->used_maps) {
12451                         ret = -ENOMEM;
12452                         goto err_release_maps;
12453                 }
12454
12455                 memcpy(env->prog->aux->used_maps, env->used_maps,
12456                        sizeof(env->used_maps[0]) * env->used_map_cnt);
12457                 env->prog->aux->used_map_cnt = env->used_map_cnt;
12458
12459                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
12460                  * bpf_ld_imm64 instructions
12461                  */
12462                 convert_pseudo_ld_imm64(env);
12463         }
12464
12465         if (ret == 0)
12466                 adjust_btf_func(env);
12467
12468 err_release_maps:
12469         if (!env->prog->aux->used_maps)
12470                 /* if we didn't copy map pointers into bpf_prog_info, release
12471                  * them now. Otherwise free_used_maps() will release them.
12472                  */
12473                 release_maps(env);
12474
12475         /* extension progs temporarily inherit the attach_type of their targets
12476            for verification purposes, so set it back to zero before returning
12477          */
12478         if (env->prog->type == BPF_PROG_TYPE_EXT)
12479                 env->prog->expected_attach_type = 0;
12480
12481         *prog = env->prog;
12482 err_unlock:
12483         if (!is_priv)
12484                 mutex_unlock(&bpf_verifier_lock);
12485         vfree(env->insn_aux_data);
12486 err_free_env:
12487         kfree(env);
12488         return ret;
12489 }